[CODEGEN/DRIVER] Tweaks for performance optimization (#193)

This commit is contained in:
Philippe Tillet
2021-08-07 16:41:44 -07:00
committed by GitHub
parent 6cd1ec3955
commit 298da78058
8 changed files with 69 additions and 4 deletions

View File

@@ -331,6 +331,11 @@ std::vector<unsigned> align::populate_max_contiguous_cast(ir::cast_inst* v){
std::vector<unsigned> align::populate_max_contiguous(ir::value *v){
if(max_contiguous_.find(v) != max_contiguous_.end())
return max_contiguous_.at(v);
if(auto *x = dynamic_cast<ir::instruction*>(v)){
unsigned max_contiguous = x->get_metadata(ir::metadata::max_contiguous);
if(max_contiguous > 0)
return add_to_cache(x, {max_contiguous}, max_contiguous_);
}
if(auto *x = dynamic_cast<ir::cast_inst*>(v))
return populate_max_contiguous_cast(x);
if(auto *x = dynamic_cast<ir::splat_inst*>(v))

View File

@@ -29,6 +29,7 @@
#include "triton/tools/sha1.hpp"
#include "triton/tools/sys/getenv.hpp"
#include "triton/tools/sys/mkdir.hpp"
#include "triton/tools/sys/exec.hpp"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IR/IRPrintingPasses.h"
@@ -299,10 +300,13 @@ std::string cu_module::compile_llvm_module(llvm::Module* module, driver::device*
void cu_module::init_from_ptx(const std::string& ptx, driver::cu_device* device) {
// JIT compile source-code
try{
std::string ptxas = tools::getenv("TRITON_PTXAS");
// use ptxas if present in PATH. Otherwise, use JIT from the driver
std::string ptxas = "ptxas";
std::string version;
int use_system_ptxas = tools::exec(ptxas + " --version 2>&1", version) == 0;
// Use PTXAS via system call
if(!ptxas.empty()){
if(use_system_ptxas){
// compile ptx with ptxas
char _fsrc[] = "/tmp/triton_k_XXXXXX";
char _flog[] = "/tmp/triton_l_XXXXXX";
@@ -316,7 +320,7 @@ void cu_module::init_from_ptx(const std::string& ptx, driver::cu_device* device)
std::string cmd;
int err;
std::string cc = std::to_string(device->compute_capability());
cmd = "ptxas -v --gpu-name=sm_" + cc + " " + fsrc + " -o " + fsrc + ".o 2> " + flog;
cmd = ptxas + " -v --gpu-name=sm_" + cc + " " + fsrc + " -o " + fsrc + ".o 2> " + flog;
err = system(cmd.c_str());
dispatch::cuModuleLoad(&*cu_, (fsrc + ".o").c_str());
unlink(_fsrc);

View File

@@ -711,6 +711,14 @@ ir::value *dispatch::multiple_of(ir::value *x, int value, ir::builder *){
return i;
}
ir::value *dispatch::max_contiguous(ir::value *x, int value, ir::builder *){
ir::instruction* i = dynamic_cast<ir::instruction*>(x);
if(!i)
throw_unreachable("max_contiguous");
i->set_metadata(ir::metadata::max_contiguous, value);
return i;
}
ir::value *dispatch::debug_barrier(ir::builder *builder) {
return builder->create_barrier();
}