[RUNTIME] Added option to print LLVM-IR
Also includes appropriate driver code change for that
This commit is contained in:
@@ -99,44 +99,7 @@ void module::compile_llvm_module(std::unique_ptr<llvm::Module> module, const std
|
||||
llvm::SmallVectorImpl<char> &buffer,
|
||||
const std::string& features,
|
||||
file_type_t ft) {
|
||||
init_llvm();
|
||||
// // debug
|
||||
llvm::legacy::PassManager pm;
|
||||
std::string tmp;
|
||||
// llvm::raw_string_ostream oss(llir_);
|
||||
// pm.add(llvm::createPrintModulePass(llvm::outs()));
|
||||
pm.add(llvm::createVerifierPass());
|
||||
pm.run(*module);
|
||||
// create machine
|
||||
module->setTargetTriple(triple);
|
||||
std::string error;
|
||||
auto target = llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
|
||||
llvm::TargetOptions opt;
|
||||
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
||||
opt.UnsafeFPMath = false;
|
||||
opt.NoInfsFPMath = false;
|
||||
opt.NoNaNsFPMath = true;
|
||||
llvm::TargetMachine *machine = target->createTargetMachine(module->getTargetTriple(), proc, features, opt,
|
||||
llvm::Reloc::PIC_, llvm::None, llvm::CodeGenOpt::Aggressive);
|
||||
// set data layout
|
||||
if(layout.empty())
|
||||
module->setDataLayout(machine->createDataLayout());
|
||||
else
|
||||
module->setDataLayout(layout);
|
||||
// emit machine code
|
||||
for (llvm::Function &f : module->functions())
|
||||
f.addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
llvm::legacy::PassManager pass;
|
||||
llvm::raw_svector_ostream stream(buffer);
|
||||
// convert triton file type to llvm file type
|
||||
auto ll_file_type = [&](module::file_type_t type){
|
||||
if(type == Object)
|
||||
return llvm::CodeGenFileType::CGFT_ObjectFile;
|
||||
return llvm::CodeGenFileType::CGFT_AssemblyFile;
|
||||
};
|
||||
// emit
|
||||
machine->addPassesToEmitFile(pass, stream, nullptr, ll_file_type(ft));
|
||||
pass.run(*module);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -271,7 +234,41 @@ std::string cu_module::compile_llvm_module(std::unique_ptr<llvm::Module> module,
|
||||
int ptx_minor = ptx % 10;
|
||||
// create
|
||||
llvm::SmallVector<char, 0> buffer;
|
||||
module::compile_llvm_module(std::move(module), "nvptx64-nvidia-cuda", "sm_" + std::to_string(std::min(cc, max_nvvm_cc)), "", buffer, "+ptx" + std::to_string(std::min(ptx, max_nvvm_ptx)), Assembly);
|
||||
std::string triple = "nvptx64-nvidia-cuda";
|
||||
std::string proc = "sm_" + std::to_string(std::min(cc, max_nvvm_cc));
|
||||
std::string layout = "";
|
||||
std::string features = "+ptx" + std::to_string(std::min(ptx, max_nvvm_ptx));
|
||||
init_llvm();
|
||||
// verify and store llvm
|
||||
llvm::legacy::PassManager pm;
|
||||
pm.add(llvm::createVerifierPass());
|
||||
pm.run(*module);
|
||||
// create machine
|
||||
module->setTargetTriple(triple);
|
||||
std::string error;
|
||||
auto target = llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
|
||||
llvm::TargetOptions opt;
|
||||
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
||||
opt.UnsafeFPMath = false;
|
||||
opt.NoInfsFPMath = false;
|
||||
opt.NoNaNsFPMath = true;
|
||||
llvm::TargetMachine *machine = target->createTargetMachine(module->getTargetTriple(), proc, features, opt,
|
||||
llvm::Reloc::PIC_, llvm::None, llvm::CodeGenOpt::Aggressive);
|
||||
// set data layout
|
||||
if(layout.empty())
|
||||
module->setDataLayout(machine->createDataLayout());
|
||||
else
|
||||
module->setDataLayout(layout);
|
||||
// emit machine code
|
||||
for (llvm::Function &f : module->functions())
|
||||
f.addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
llvm::legacy::PassManager pass;
|
||||
llvm::raw_svector_ostream stream(buffer);
|
||||
// emit
|
||||
machine->addPassesToEmitFile(pass, stream, nullptr, llvm::CodeGenFileType::CGFT_AssemblyFile);
|
||||
pass.run(*module);
|
||||
|
||||
// post-process
|
||||
std::string result(buffer.begin(), buffer.end());
|
||||
find_and_replace(result, ".version", "\n", ".version " + std::to_string(ptx_major) + "." + std::to_string(ptx_minor) + "\n");
|
||||
find_and_replace(result, ".target", "\n", ".target " + sm + "\n");
|
||||
@@ -280,10 +277,7 @@ std::string cu_module::compile_llvm_module(std::unique_ptr<llvm::Module> module,
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
cu_module::cu_module(driver::device* device, std::unique_ptr<llvm::Module> ll_module): cu_module(device, compile_llvm_module(std::move(ll_module), device)) { }
|
||||
|
||||
cu_module::cu_module(driver::device* device, std::string const & source) : module(CUmodule(), true), ptx_(source){
|
||||
void cu_module::init_from_ptx(const std::string& ptx) {
|
||||
// JIT compile source-code
|
||||
|
||||
try{
|
||||
@@ -295,7 +289,7 @@ cu_module::cu_module(driver::device* device, std::string const & source) : modul
|
||||
// std::string fsrc = _fsrc;
|
||||
// std::string flog = _flog;
|
||||
// std::ofstream ofs(fsrc);
|
||||
// ofs << source;
|
||||
// ofs << ptx;
|
||||
// ofs.close();
|
||||
// std::string cmd;
|
||||
// int err;
|
||||
@@ -340,7 +334,7 @@ cu_module::cu_module(driver::device* device, std::string const & source) : modul
|
||||
}
|
||||
catch(exception::cuda::invalid_ptx const &){
|
||||
//#ifdef TRITON_LOG_PTX_ERROR
|
||||
std::cout << source << std::endl;
|
||||
std::cout << ptx << std::endl;
|
||||
std::cerr << "It appears that Triton produced invalid PTX code:" << std::endl;
|
||||
// exit(1);
|
||||
//#endif
|
||||
@@ -348,6 +342,18 @@ cu_module::cu_module(driver::device* device, std::string const & source) : modul
|
||||
}
|
||||
}
|
||||
|
||||
cu_module::cu_module(driver::device* device, std::unique_ptr<llvm::Module> ll_module): module(CUmodule(), true) {
|
||||
llvm::raw_string_ostream oss(llir_);
|
||||
oss << *ll_module;
|
||||
oss.flush();
|
||||
ptx_ = compile_llvm_module(std::move(ll_module), device);
|
||||
init_from_ptx(ptx_);
|
||||
}
|
||||
|
||||
cu_module::cu_module(driver::device*, std::string const & source) : module(CUmodule(), true), ptx_(source){
|
||||
init_from_ptx(ptx_);
|
||||
}
|
||||
|
||||
std::unique_ptr<buffer> cu_module::symbol(const char *name) const{
|
||||
CUdeviceptr handle;
|
||||
size_t size;
|
||||
|
@@ -224,6 +224,45 @@ void kernel::operator()(void *args, size_t args_size, driver::stream *stream, co
|
||||
stream->enqueue(&*ker_, grid, {opt.num_warps * 32, 1, 1}, args, args_size);
|
||||
}
|
||||
|
||||
std::string kernel::get_asm(asm_mode_t mode) {
|
||||
switch(mode){
|
||||
case ASM_LLIR:{
|
||||
return ((driver::cu_module*)mod_.get())->llir();
|
||||
}
|
||||
case ASM_NV_PTX:
|
||||
case ASM_NV_SASS:{
|
||||
std::string ptx = ((driver::cu_module*)mod_.get())->ptx();
|
||||
// SASS
|
||||
std::string input = std::tmpnam(nullptr);
|
||||
std::string output = std::tmpnam(nullptr);
|
||||
std::ofstream ofs(input);
|
||||
ofs << ptx;
|
||||
ofs.close();
|
||||
if(mode == ASM_NV_PTX)
|
||||
return ptx;
|
||||
std::string cmd;
|
||||
int err;
|
||||
// compile ptx
|
||||
driver::cu_device* cu_device = (driver::cu_device*)dev_;
|
||||
cmd = "ptxas --gpu-name=sm_" + std::to_string(cu_device->compute_capability()) + " " + input + " -o " + input + ".o";
|
||||
err = system(cmd.c_str());
|
||||
// disassemble
|
||||
cmd = "cuobjdump --dump-sass " + input + ".o >> " + output;
|
||||
err = system(cmd.c_str());
|
||||
std::regex comment(" *\\/\\* 0x[0-9a-f]+ \\*\\/");
|
||||
std::string to_delete = " /*";
|
||||
std::ifstream ifs(output);
|
||||
std::string line;
|
||||
std::string sass;
|
||||
while(std::getline(ifs, line))
|
||||
if(!std::regex_match(line, comment))
|
||||
sass += line + "\n";
|
||||
return sass;
|
||||
}
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
/* --------------------------------- */
|
||||
/* --------------------------------- */
|
||||
/* --------------------------------- */
|
||||
|
Reference in New Issue
Block a user