[codegen/optimize_dce.cpp] fixed bugs whereby barriers were removed by DCE

This commit is contained in:
Philippe Tillet
2019-07-31 15:11:10 -07:00
parent 5af7e5adac
commit bb32ac56c9
8 changed files with 60 additions and 17 deletions

View File

@@ -130,7 +130,7 @@ public:
// create profile
triton::dnn::blocksparse::dot dot(N, params_.K, params_.segments, params_.C, "fp16", params_.bsize, params_.locks, params_.blocks, OP);
// blocksparse matmul
triton::dnn::base* op = dot.enqueue(stream, {&da, &db, &dc, &dlut}, triton::dnn::FULL_TUNING);
triton::dnn::base* op = dot.enqueue(stream, {&da, &db, &dc, &dlut}, triton::dnn::NO_TUNING);
triton::driver::buffer* locks_buffer = ((triton::dnn::blocksparse::dot*)op)->get_locks();
Tensor *tmp = nullptr;
TensorShape tmp_shapes;