[OPS] Add performance model for gemm/gemv (#397)
Significantly improves the performance of `triton.ops.matmul` in memory-bound settings via the use of many more block configs coupled with a performance model to drive the auto-tuning process.
This commit is contained in:
@@ -52,7 +52,7 @@ std::unique_ptr<llvm::Module> add_passes_to_emit_bin(ir::module &ir, llvm::LLVMC
|
||||
peephole.run(ir);
|
||||
dce.run(ir);
|
||||
pipeline.run(ir);
|
||||
dce.run(ir);
|
||||
dce.run(ir);
|
||||
disassociate.run(ir);
|
||||
dce.run(ir);
|
||||
align.run(ir);
|
||||
@@ -85,6 +85,7 @@ std::unique_ptr<llvm::Module> add_passes_to_emit_bin(ir::module &ir, llvm::LLVMC
|
||||
allocation.run(ir);
|
||||
prefetch_s.run(ir);
|
||||
barriers.run(ir);
|
||||
// ir.print(std::cout);
|
||||
isel.visit(ir, *llvm);
|
||||
shared_static = allocation.allocated_size();
|
||||
return llvm;
|
||||
|
@@ -94,4 +94,4 @@ void cts::run(ir::module &mod) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -327,4 +327,4 @@ void pipeline::run(ir::module &mod) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user