[OPS] Add performance model for gemm/gemv (#397)

Significantly improves the performance of `triton.ops.matmul` in memory-bound settings via the use of many more block configs coupled with a performance model to drive the auto-tuning process.
2021-12-22 01:56:10 +08:00
parent 5cdb948c05
commit 39d4bfed83
12 changed files with 289 additions and 27 deletions
--- a/lib/codegen/pass.cc
+++ b/lib/codegen/pass.cc
@@ -52,7 +52,7 @@ std::unique_ptr<llvm::Module> add_passes_to_emit_bin(ir::module &ir, llvm::LLVMC
  peephole.run(ir);
  dce.run(ir);
  pipeline.run(ir);
-  dce.run(ir);
+  dce.run(ir);  
  disassociate.run(ir);
  dce.run(ir);
  align.run(ir);
@@ -85,6 +85,7 @@ std::unique_ptr<llvm::Module> add_passes_to_emit_bin(ir::module &ir, llvm::LLVMC
  allocation.run(ir);
  prefetch_s.run(ir);
  barriers.run(ir);
+  // ir.print(std::cout);
  isel.visit(ir, *llvm);
  shared_static = allocation.allocated_size();
  return llvm;
--- a/lib/codegen/transform/cts.cc
+++ b/lib/codegen/transform/cts.cc
@@ -94,4 +94,4 @@ void cts::run(ir::module &mod) {

 }
 }
-}
+}
--- a/lib/codegen/transform/pipeline.cc
+++ b/lib/codegen/transform/pipeline.cc
@@ -327,4 +327,4 @@ void pipeline::run(ir::module &mod) {

 }
 }
-}
+}