[Triton-MLIR] tt.dot operands now must have DotOperand layout; also added prefetch pass prototype (#712)

Co-authored-by: Jokeren <kerenzhou@openai.com> Co-authored-by: Phil Tillet <phil@openai.com> Co-authored-by: Superjomn <yanchunwei@outlook.com>
2022-11-10 13:57:27 +08:00
parent 8832e32683
commit 4946167241
29 changed files with 1227 additions and 507 deletions
--- a/python/triton/compiler.py
+++ b/python/triton/compiler.py
@@ -876,6 +876,9 @@ def ttir_to_ttgir(mod, num_warps, num_stages):
    pm = _triton.ir.pass_manager(mod.context)
    pm.add_convert_triton_to_tritongpu_pass(num_warps)
    pm.enable_debug()
+    # Convert blocked layout to mma layout for dot ops so that pipeline
+    # can get shared memory swizzled correctly.
+    pm.add_triton_gpu_combine_pass()
    pm.add_tritongpu_pipeline_pass(num_stages)
    pm.add_canonicalizer_pass()
    pm.add_cse_pass()