[OPTIMIZER] Fixed up order of shared layouts (#881)
This commit is contained in:
@@ -882,6 +882,7 @@ def ttir_to_ttgir(mod, num_warps, num_stages):
|
||||
pm.enable_debug()
|
||||
# Convert blocked layout to mma layout for dot ops so that pipeline
|
||||
# can get shared memory swizzled correctly.
|
||||
pm.add_coalesce_pass()
|
||||
pm.add_triton_gpu_combine_pass()
|
||||
pm.add_tritongpu_pipeline_pass(num_stages)
|
||||
# Prefetch must be done after pipeline pass because pipeline pass
|
||||
@@ -889,7 +890,6 @@ def ttir_to_ttgir(mod, num_warps, num_stages):
|
||||
pm.add_tritongpu_prefetch_pass()
|
||||
pm.add_canonicalizer_pass()
|
||||
pm.add_cse_pass()
|
||||
pm.add_coalesce_pass()
|
||||
pm.add_triton_gpu_combine_pass()
|
||||
pm.add_licm_pass()
|
||||
pm.add_triton_gpu_combine_pass()
|
||||
|
Reference in New Issue
Block a user