From deb7a1cc5cea0f966f90a20bbafbb8b4c0f70e1c Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Sat, 23 Mar 2019 18:58:25 -0700 Subject: [PATCH] Hack to make OpenCL for AMD work --- examples/matrix.cpp | 7 +++-- lib/codegen/selection.cpp | 2 ++ lib/driver/module.cpp | 64 ++++++++++++++------------------------- 3 files changed, 28 insertions(+), 45 deletions(-) diff --git a/examples/matrix.cpp b/examples/matrix.cpp index bbe9e25bf..e16d6b2dd 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -111,7 +111,7 @@ int main() { triton::jit jit(context); // matrix multiplication parameters - size_t M = 512, N = 512, K = 512; + int32_t M = 128, N = 128, K = 128; std::vector hc(M*N); std::vector rc(M*N); std::vector ha(M*K); @@ -163,8 +163,9 @@ int main() { stream->enqueue(kernel, grid, {nthreads, 1, 1}); stream->synchronize(); // benchmark - double ts = bench([&](){stream->enqueue(kernel, grid, {nthreads, 1, 1});}, - [&](){ stream->synchronize(); }); +// double ts = bench([&](){stream->enqueue(kernel, grid, {nthreads, 1, 1});}, +// [&](){ stream->synchronize(); }); + double ts = 1; ts = ts * 1e-9; double tflops = 2*M*N*K / ts * 1e-12; return tflops; diff --git a/lib/codegen/selection.cpp b/lib/codegen/selection.cpp index d893dbeee..aff4dfbff 100644 --- a/lib/codegen/selection.cpp +++ b/lib/codegen/selection.cpp @@ -748,6 +748,8 @@ void selection::lower_tile_instruction(ir::instruction *ins, llvm::IRBuilder<> & indices_t b_idx = {idx[1], builder.getInt32(K)}; Value *a = TA->get_value(a_idx); Value *b = TB->get_value(b_idx); +// a = ConstantFP::get(builder.getFloatTy(), 1); +// b = ConstantFP::get(builder.getFloatTy(), 1); res = builder.CreateCall(f_mul_add, {a, b, res}); } result->set_value(idx, res); diff --git a/lib/driver/module.cpp b/lib/driver/module.cpp index 1e7407234..4c0018c3d 100755 --- a/lib/driver/module.cpp +++ b/lib/driver/module.cpp @@ -1,22 +1,22 @@ /* Copyright 2015-2017 Philippe Tillet -* -* Permission is hereby granted, free of charge, to any person obtaining -* a copy of this software and associated documentation files -* (the "Software"), to deal in the Software without restriction, -* including without limitation the rights to use, copy, modify, merge, -* publish, distribute, sublicense, and/or sell copies of the Software, -* and to permit persons to whom the Software is furnished to do so, +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files +* (the "Software"), to deal in the Software without restriction, +* including without limitation the rights to use, copy, modify, merge, +* publish, distribute, sublicense, and/or sell copies of the Software, +* and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: -* -* The above copyright notice and this permission notice shall be +* +* The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @@ -185,37 +185,17 @@ host_module::host_module(driver::context * context, llvm::Module* src): module(c ocl_module::ocl_module(driver::context * context, llvm::Module* src): module(context, cl_program(), true) { init_llvm(); -// std::vector files = { -// "opencl.amdgcn.bc", -// "ocml.amdgcn.bc", -// "ockl.amdgcn.bc", -// "oclc_correctly_rounded_sqrt_off.amdgcn.bc", -// "oclc_daz_opt_on.amdgcn.bc", -// "oclc_finite_only_off.amdgcn.bc", -// "oclc_isa_version_902.amdgcn.bc", -// "oclc_unsafe_math_off.amdgcn.bc" -// }; -// for(auto&x : files) -// x = "/opt/rocm/lib/" + x; - - llvm::LLVMContext ctx; -// llvm::IRBuilder<> builder(ctx); -// auto dummy = new llvm::Module("matmul", ctx); -// llvm::Function *fn = llvm::Function::Create(llvm::FunctionType::get(builder.getVoidTy(), {}, false), llvm::Function::ExternalLinkage, "matmul", dummy); -// llvm::BasicBlock *entry = llvm::BasicBlock::Create(ctx, "entry", fn); -// builder.SetInsertPoint(entry); -// builder.CreateRetVoid(); llvm::SmallVector buffer; - llvm::SMDiagnostic error; - auto dummy = llvm::parseIRFile("test.bc", error, ctx); - module::compile_llvm_module(dummy.get(), "amdgcn-amd-amdhsa-amdgizcl", "gfx902", "", buffer); + module::compile_llvm_module(src, "amdgcn-amd-amdhsa-amdgizcl", "gfx902", "", buffer); + std::ofstream output("tmp.o", std::ios::binary); + std::copy(buffer.begin(), buffer.end(), std::ostreambuf_iterator(output)); + system("ld.lld tmp.o -shared -o test.o"); -// std::ifstream fin("test.o", std::ios::in | std::ios::binary ); -// std::vector buffer(9296); -// fin.read(buffer.data(), buffer.size()); - size_t sizes[] = {buffer.size()}; - const unsigned char* data[] = {(unsigned char*)buffer.data()}; + std::ifstream input("test.o", std::ios::in | std::ios::binary ); + std::vector in_buffer(std::istreambuf_iterator(input), {}); + size_t sizes[] = {in_buffer.size()}; + const unsigned char* data[] = {(unsigned char*)in_buffer.data()}; cl_int status; cl_int err; *cl_ = dispatch::clCreateProgramWithBinary(*context->cl(), 1, &*context->device()->cl(), sizes, data, &status, &err);