[LANG] Added __debug_barrier() call to force insertion of a CUDA

__syncthreads
This commit is contained in:
Philippe Tillet
2021-01-31 20:09:36 -05:00
parent 3fde4b8f5b
commit ad5a30bae1
2 changed files with 5 additions and 0 deletions

View File

@@ -348,6 +348,10 @@ void Generator::VisitFuncCall(FuncCall* funcCall) {
ir::value* false_val = ret_;
return set_ret(bld_->create_select(cond, true_val, false_val));
}
if(name == "__debug_barrier"){
bld_->create_barrier();
return;
}
return error_not_implemented("function calls not implemented");
}

View File

@@ -102,6 +102,7 @@ extern void atomic_add_float_1x1(float*, float, bool);
extern int atomic_cas(int*, int, int);
extern int atomic_xchg(int*, int);
extern int get_program_id(int);
extern void __debug_barrier();
extern int get_num_programs(int);
extern int select(bool, int, int);
extern char __constant__ * calloc(int);