trying to work around tensorflow limitations

This commit is contained in:
Philippe Tillet
2019-09-04 01:54:43 -04:00
parent 2ccc915011
commit b747959a57
8 changed files with 48 additions and 31 deletions

View File

@@ -87,19 +87,18 @@ src = '''
else {
int *plock = locks + ridx*nlocks + lockid - 1;
int *pcount = plock + get_num_program(0)*nlocks;
while(__atomic_cas(plock, 0, 1));
while(atomic_cas(plock, 0, 1));
int count = *pcount;
if(count == 0)
*?(checkc) pc = c;
else
*?(checkc) pc = c + *pc;
__atomic_exch(pcount, 1);
__atomic_exch(plock, 0);
atomic_exch(pcount, 1);
atomic_exch(plock, 0);
}
}
'''
# std::string dot::triton_c_src_dw() const {
# bool AT = (op_ == WGRAD);
# bool BT = (op_ == FPROP);