[FRONTEND] Add possibility for user to force a GPU threadsync barrier (#976)
compiler still has pitfalls even in master branch
This commit is contained in:
@@ -120,6 +120,7 @@ void init_triton_ir(py::module &&m) {
|
||||
// some placeholders
|
||||
self.getOrLoadDialect<mlir::triton::TritonDialect>();
|
||||
self.getOrLoadDialect<mlir::LLVM::LLVMDialect>();
|
||||
self.getOrLoadDialect<mlir::gpu::GPUDialect>();
|
||||
});
|
||||
// .def(py::init([](){
|
||||
// mlir::MLIRContext context;
|
||||
@@ -1265,7 +1266,13 @@ void init_triton_ir(py::module &&m) {
|
||||
[](mlir::OpBuilder &self, mlir::Type &type) -> mlir::Value {
|
||||
auto loc = self.getUnknownLoc();
|
||||
return self.create<::mlir::LLVM::UndefOp>(loc, type);
|
||||
});
|
||||
})
|
||||
// Force GPU barrier
|
||||
.def("create_barrier",
|
||||
[](mlir::OpBuilder &self) {
|
||||
auto loc = self.getUnknownLoc();
|
||||
self.create<mlir::gpu::BarrierOp>(loc);
|
||||
});
|
||||
|
||||
py::class_<mlir::PassManager>(m, "pass_manager")
|
||||
.def(py::init<mlir::MLIRContext *>())
|
||||
|
@@ -1139,7 +1139,7 @@ def max_contiguous(x: tl.tensor, values: List[int]) -> tl.tensor:
|
||||
|
||||
|
||||
def debug_barrier(builder: ir.builder) -> tl.tensor:
|
||||
return tl.tensor(builder.create_barrier(''), tl.void)
|
||||
return tl.tensor(builder.create_barrier(), tl.void)
|
||||
|
||||
|
||||
def printf(prefix: str, args: List[tl.tensor], builder: ir.builder) -> tl.tensor:
|
||||
|
Reference in New Issue
Block a user