[FRONTEND] Add possibility for user to force a GPU threadsync barrier (#976)

compiler still has pitfalls even in master branch
This commit is contained in:
Philippe Tillet
2022-12-11 23:03:52 -08:00
committed by GitHub
parent 52accd4c2b
commit e552219104
2 changed files with 9 additions and 2 deletions

View File

@@ -120,6 +120,7 @@ void init_triton_ir(py::module &&m) {
// some placeholders
self.getOrLoadDialect<mlir::triton::TritonDialect>();
self.getOrLoadDialect<mlir::LLVM::LLVMDialect>();
self.getOrLoadDialect<mlir::gpu::GPUDialect>();
});
// .def(py::init([](){
// mlir::MLIRContext context;
@@ -1265,7 +1266,13 @@ void init_triton_ir(py::module &&m) {
[](mlir::OpBuilder &self, mlir::Type &type) -> mlir::Value {
auto loc = self.getUnknownLoc();
return self.create<::mlir::LLVM::UndefOp>(loc, type);
});
})
// Force GPU barrier
.def("create_barrier",
[](mlir::OpBuilder &self) {
auto loc = self.getUnknownLoc();
self.create<mlir::gpu::BarrierOp>(loc);
});
py::class_<mlir::PassManager>(m, "pass_manager")
.def(py::init<mlir::MLIRContext *>())

View File

@@ -1139,7 +1139,7 @@ def max_contiguous(x: tl.tensor, values: List[int]) -> tl.tensor:
def debug_barrier(builder: ir.builder) -> tl.tensor:
return tl.tensor(builder.create_barrier(''), tl.void)
return tl.tensor(builder.create_barrier(), tl.void)
def printf(prefix: str, args: List[tl.tensor], builder: ir.builder) -> tl.tensor: