[OPTIMIZER] Fixed up order of shared layouts (#881)

This commit is contained in:
Philippe Tillet
2022-11-21 06:25:02 +01:00
committed by GitHub
parent 4d64ffb5fe
commit 23f71daa27
6 changed files with 27 additions and 27 deletions

View File

@@ -882,6 +882,7 @@ def ttir_to_ttgir(mod, num_warps, num_stages):
pm.enable_debug()
# Convert blocked layout to mma layout for dot ops so that pipeline
# can get shared memory swizzled correctly.
pm.add_coalesce_pass()
pm.add_triton_gpu_combine_pass()
pm.add_tritongpu_pipeline_pass(num_stages)
# Prefetch must be done after pipeline pass because pipeline pass
@@ -889,7 +890,6 @@ def ttir_to_ttgir(mod, num_warps, num_stages):
pm.add_tritongpu_prefetch_pass()
pm.add_canonicalizer_pass()
pm.add_cse_pass()
pm.add_coalesce_pass()
pm.add_triton_gpu_combine_pass()
pm.add_licm_pass()
pm.add_triton_gpu_combine_pass()