[Backend] Vectorize Load/Store Ops (#86)

This PR does the following things: - Code refactoring on Load and Store op codegen, rewrite with same logic and share much code - Support the vectorized load/store
2022-09-07 03:28:09 +08:00
parent 35e346bcff
commit a9464f4993
10 changed files with 433 additions and 295 deletions
--- a/python/triton/compiler.py
+++ b/python/triton/compiler.py
@@ -798,7 +798,8 @@ def optimize_tritongpu_ir(mod, num_stages):
    pm.add_tritongpu_pipeline_pass(num_stages)
    pm.add_canonicalizer_pass()
    pm.add_cse_pass()
-    # pm.add_triton_gpu_combine_pass()
+    pm.add_coalesce_pass()
+    pm.add_triton_gpu_combine_pass()
    pm.add_triton_gpu_verifier_pass()
    pm.run(mod)
    return mod