From ad5a30bae134378a5553e90444eacf17ab54aa8e Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Sun, 31 Jan 2021 20:09:36 -0500 Subject: [PATCH] [LANG] Added __debug_barrier() call to force insertion of a CUDA __syncthreads --- lib/lang/code_gen.cc | 4 ++++ lib/runtime/function.cc | 1 + 2 files changed, 5 insertions(+) diff --git a/lib/lang/code_gen.cc b/lib/lang/code_gen.cc index a23d4125c..0a1a15eab 100644 --- a/lib/lang/code_gen.cc +++ b/lib/lang/code_gen.cc @@ -348,6 +348,10 @@ void Generator::VisitFuncCall(FuncCall* funcCall) { ir::value* false_val = ret_; return set_ret(bld_->create_select(cond, true_val, false_val)); } + if(name == "__debug_barrier"){ + bld_->create_barrier(); + return; + } return error_not_implemented("function calls not implemented"); } diff --git a/lib/runtime/function.cc b/lib/runtime/function.cc index 05049244b..2dc9c1a5a 100644 --- a/lib/runtime/function.cc +++ b/lib/runtime/function.cc @@ -102,6 +102,7 @@ extern void atomic_add_float_1x1(float*, float, bool); extern int atomic_cas(int*, int, int); extern int atomic_xchg(int*, int); extern int get_program_id(int); +extern void __debug_barrier(); extern int get_num_programs(int); extern int select(bool, int, int); extern char __constant__ * calloc(int);