From e552219104ff24e15045fc9e8e82af3d14776137 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Sun, 11 Dec 2022 23:03:52 -0800 Subject: [PATCH] [FRONTEND] Add possibility for user to force a GPU threadsync barrier (#976) compiler still has pitfalls even in master branch --- python/src/triton.cc | 9 ++++++++- python/triton/language/semantic.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/python/src/triton.cc b/python/src/triton.cc index 167c53061..c0b08cd73 100644 --- a/python/src/triton.cc +++ b/python/src/triton.cc @@ -120,6 +120,7 @@ void init_triton_ir(py::module &&m) { // some placeholders self.getOrLoadDialect(); self.getOrLoadDialect(); + self.getOrLoadDialect(); }); // .def(py::init([](){ // mlir::MLIRContext context; @@ -1265,7 +1266,13 @@ void init_triton_ir(py::module &&m) { [](mlir::OpBuilder &self, mlir::Type &type) -> mlir::Value { auto loc = self.getUnknownLoc(); return self.create<::mlir::LLVM::UndefOp>(loc, type); - }); + }) + // Force GPU barrier + .def("create_barrier", + [](mlir::OpBuilder &self) { + auto loc = self.getUnknownLoc(); + self.create(loc); + }); py::class_(m, "pass_manager") .def(py::init()) diff --git a/python/triton/language/semantic.py b/python/triton/language/semantic.py index 6bdc91ebd..2c0e8a475 100644 --- a/python/triton/language/semantic.py +++ b/python/triton/language/semantic.py @@ -1139,7 +1139,7 @@ def max_contiguous(x: tl.tensor, values: List[int]) -> tl.tensor: def debug_barrier(builder: ir.builder) -> tl.tensor: - return tl.tensor(builder.create_barrier(''), tl.void) + return tl.tensor(builder.create_barrier(), tl.void) def printf(prefix: str, args: List[tl.tensor], builder: ir.builder) -> tl.tensor: