[OPS] Add performance model for gemm/gemv (#397)

Significantly improves the performance of `triton.ops.matmul` in memory-bound settings via the use of many more block configs coupled with a performance model to drive the auto-tuning process.
This commit is contained in:
daadaada
2021-12-22 01:56:10 +08:00
committed by GitHub
parent 5cdb948c05
commit 39d4bfed83
12 changed files with 289 additions and 27 deletions

View File

@@ -52,7 +52,7 @@ std::unique_ptr<llvm::Module> add_passes_to_emit_bin(ir::module &ir, llvm::LLVMC
peephole.run(ir);
dce.run(ir);
pipeline.run(ir);
dce.run(ir);
dce.run(ir);
disassociate.run(ir);
dce.run(ir);
align.run(ir);
@@ -85,6 +85,7 @@ std::unique_ptr<llvm::Module> add_passes_to_emit_bin(ir::module &ir, llvm::LLVMC
allocation.run(ir);
prefetch_s.run(ir);
barriers.run(ir);
// ir.print(std::cout);
isel.visit(ir, *llvm);
shared_static = allocation.allocated_size();
return llvm;

View File

@@ -94,4 +94,4 @@ void cts::run(ir::module &mod) {
}
}
}
}

View File

@@ -327,4 +327,4 @@ void pipeline::run(ir::module &mod) {
}
}
}
}