[OPTIMIZER] Fixed up order of shared layouts (#881)

2022-11-21 06:25:02 +01:00
parent 4d64ffb5fe
commit 23f71daa27
6 changed files with 27 additions and 27 deletions
--- a/python/triton/compiler.py
+++ b/python/triton/compiler.py
@@ -882,6 +882,7 @@ def ttir_to_ttgir(mod, num_warps, num_stages):
    pm.enable_debug()
    # Convert blocked layout to mma layout for dot ops so that pipeline
    # can get shared memory swizzled correctly.
+    pm.add_coalesce_pass()
    pm.add_triton_gpu_combine_pass()
    pm.add_tritongpu_pipeline_pass(num_stages)
    # Prefetch must be done after pipeline pass because pipeline pass
@@ -889,7 +890,6 @@ def ttir_to_ttgir(mod, num_warps, num_stages):
    pm.add_tritongpu_prefetch_pass()
    pm.add_canonicalizer_pass()
    pm.add_cse_pass()
-    pm.add_coalesce_pass()
    pm.add_triton_gpu_combine_pass()
    pm.add_licm_pass()
    pm.add_triton_gpu_combine_pass()