[OPTIMIZER] Coalesce pass no longer takes a num-warps argument (#99)

Improved design to avoid inconsistent `num-warps` value between the pass and the parent module of the operation it processes.
2022-09-05 18:09:02 -07:00
parent ea175f689e
commit a0bab9748e
5 changed files with 26 additions and 31 deletions
--- a/test/TritonGPU/coalesce.mlir
+++ b/test/TritonGPU/coalesce.mlir
@@ -4,6 +4,8 @@
 #blocked1 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [4, 1], order = [0, 1]}>
 #blocked2 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [1, 32], warpsPerCTA = [1, 4], order = [0, 1]}>

+module attributes {"triton_gpu.num-warps" = 4 : i32} {
+

 // CHECK: [[row_layout:#.*]] = #triton_gpu.blocked<{sizePerThread = [1, 4], threadsPerWarp = [2, 16], warpsPerCTA = [1, 4], order = [1, 0]}>
 // CHECK: [[col_layout:#.*]] = #triton_gpu.blocked<{sizePerThread = [4, 1], threadsPerWarp = [16, 2], warpsPerCTA = [4, 1], order = [0, 1]}>
@@ -44,3 +46,5 @@ func @transpose(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32},
  tt.store %18, %19, %cst : tensor<64x64xf32, #blocked1>
  return
 }
+
+}