[LANG] Added __debug_barrier() call to force insertion of a CUDA
__syncthreads
This commit is contained in:
@@ -348,6 +348,10 @@ void Generator::VisitFuncCall(FuncCall* funcCall) {
|
||||
ir::value* false_val = ret_;
|
||||
return set_ret(bld_->create_select(cond, true_val, false_val));
|
||||
}
|
||||
if(name == "__debug_barrier"){
|
||||
bld_->create_barrier();
|
||||
return;
|
||||
}
|
||||
return error_not_implemented("function calls not implemented");
|
||||
}
|
||||
|
||||
|
@@ -102,6 +102,7 @@ extern void atomic_add_float_1x1(float*, float, bool);
|
||||
extern int atomic_cas(int*, int, int);
|
||||
extern int atomic_xchg(int*, int);
|
||||
extern int get_program_id(int);
|
||||
extern void __debug_barrier();
|
||||
extern int get_num_programs(int);
|
||||
extern int select(bool, int, int);
|
||||
extern char __constant__ * calloc(int);
|
||||
|
Reference in New Issue
Block a user