/* Copyright 2015-2017 Philippe Tillet * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files * (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include "triton/driver/module.h" #include "triton/driver/context.h" #include "triton/driver/error.h" #include "triton/tools/sys/getenv.hpp" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Verifier.h" #include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Linker/Linker.h" #include "llvm/IRReader/IRReader.h" #include "llvm/AsmParser/Parser.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Analysis/LoopPass.h" namespace triton { namespace driver { /* ------------------------ */ // Base // /* ------------------------ */ void module::init_llvm() { static bool init = false; if(!init){ llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargets(); llvm::InitializeAllTargetMCs(); llvm::InitializeAllAsmParsers(); llvm::InitializeAllAsmPrinters(); init = true; } } module::module(driver::context* ctx, CUmodule mod, bool has_ownership) : polymorphic_resource(mod, has_ownership), ctx_(ctx) { } module::module(driver::context* ctx, cl_program mod, bool has_ownership) : polymorphic_resource(mod, has_ownership), ctx_(ctx) { } driver::context* module::context() const { return ctx_; } module* module::create(driver::context* ctx, llvm::Module *src) { switch(ctx->backend()){ case CUDA: return new cu_module(ctx, src); case OpenCL: return new ocl_module(ctx, src); default: throw std::runtime_error("unknown backend"); } } void module::compile_llvm_module(llvm::Module* module, const std::string& triple, const std::string &proc, std::string layout, llvm::SmallVectorImpl &buffer, std::vector files) { init_llvm(); // create machine module->setTargetTriple(triple); std::string error; auto target = llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error); llvm::TargetOptions opt; opt.AllowFPOpFusion = llvm::FPOpFusion::Fast; opt.UnsafeFPMath = false; opt.NoInfsFPMath = false; opt.NoNaNsFPMath = true; llvm::TargetMachine *machine = target->createTargetMachine(module->getTargetTriple(), proc, "", opt, llvm::Reloc::PIC_, llvm::None, llvm::CodeGenOpt::Aggressive); // set data layout if(layout.empty()) module->setDataLayout(machine->createDataLayout()); else module->setDataLayout(layout); // link for (std::string& file: files) { std::string path = "/opt/rocm/lib/" + file; llvm::SMDiagnostic err; std::unique_ptr mlib = llvm::parseIRFile(path, err, module->getContext()); if (mlib.get() == nullptr) { std::string msg = err.getMessage(); std::cerr << "Fail to load bitcode file " << path << "\n" << "line " << err.getLineNo() << ":" << msg; } mlib->setTargetTriple(module->getTargetTriple()); mlib->setDataLayout(module->getDataLayout()); for (llvm::Function &f : mlib->functions()) { f.addFnAttr(llvm::Attribute::AlwaysInline); } llvm::Linker::linkModules(*module, std::move(mlib)); } // emit machine code llvm::legacy::PassManager pass; llvm::raw_svector_ostream stream(buffer); machine->addPassesToEmitFile(pass, stream, nullptr, llvm::TargetMachine::CGFT_ObjectFile); pass.run(*module); // std::cout << std::string(buffer.begin(), buffer.end()) << std::endl; } /* ------------------------ */ // OpenCL // /* ------------------------ */ ocl_module::ocl_module(driver::context * context, llvm::Module* src): module(context, cl_program(), true) { // const char* x = "__kernel void matmul(){ }"; // cl_int err; // *cl_ = dispatch::clCreateProgramWithSource(*context->cl(), 1, &x, NULL, &err); // check(err); // return; init_llvm(); llvm::SmallVector buffer; std::vector files = { "oclc_daz_opt_on.amdgcn.bc", "ocml.amdgcn.bc", "hc.amdgcn.bc", "ockl.amdgcn.bc", "oclc_correctly_rounded_sqrt_off.amdgcn.bc", "oclc_correctly_rounded_sqrt_on.amdgcn.bc", "oclc_daz_opt_off.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", "oclc_finite_only_on.amdgcn.bc", "oclc_isa_version_803.amdgcn.bc", "oclc_isa_version_900.amdgcn.bc", "oclc_unsafe_math_off.amdgcn.bc", "oclc_unsafe_math_on.amdgcn.bc", "oclc_isa_version_700.amdgcn.bc", "opencl.amdgcn.bc" }; module::compile_llvm_module(src, "amdgcn-amd-amdpal", "gfx902", "", buffer, files); // llvm::BitcodeWriter writer(buffer); // writer.writeModule(*src); // llvm::legacy::PassManager pass; // llvm::raw_svector_ostream stream(buffer); // pass.add(llvm::createPrintModulePass(stream)); // pass.run(*src); size_t sizes[] = {buffer.size()}; const unsigned char* data[] = {(unsigned char*)buffer.data()}; cl_int status; cl_int err; *cl_ = dispatch::clCreateProgramWithBinary(*context->cl(), 1, &*context->device()->cl(), sizes, data, &status, &err); check(err); check(status); try{ dispatch::clBuildProgram(*cl_, 1, &*context->device()->cl(), NULL, NULL, NULL); } catch(...){ char log[2048]; dispatch::clGetProgramBuildInfo(*cl_, *context->device()->cl(), CL_PROGRAM_BUILD_LOG, 1024, log, NULL); std::cout << log << std::endl; } } /* ------------------------ */ // CUDA // /* ------------------------ */ std::string cu_module::compile_llvm_module(llvm::Module* module) { // set data layout std::string layout = "e"; bool is_64bit = true; bool use_short_pointers = true; if (!is_64bit) layout += "-p:32:32"; else if (use_short_pointers) layout += "-p3:32:32-p4:32:32-p5:32:32"; layout += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; // create llvm::SmallVector buffer; module::compile_llvm_module(module, "nvptx64-nvidia-cuda", "sm_52", layout, buffer); return std::string(buffer.begin(), buffer.end()); } cu_module::cu_module(driver::context * context, llvm::Module* ll_module): cu_module(context, compile_llvm_module(ll_module)) { } cu_module::cu_module(driver::context * context, std::string const & source) : module(context, CUmodule(), true), source_(source){ cu_context::context_switcher ctx_switch(*context); // JIT compile source-code CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER}; unsigned int errbufsize = 8096; std::string errbuf(errbufsize, 0); void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()}; try{ dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval); }catch(exception::cuda::base const &){ std::cerr << "Compilation Failed! Log: " << std::endl; std::cerr << errbuf << std::endl; throw; } } cu_buffer cu_module::symbol(const char *name) const{ CUdeviceptr handle; size_t size; dispatch::cuModuleGetGlobal_v2(&handle, &size, *cu_, name); return cu_buffer(ctx_, handle, false); } } }