diff --git a/CMakeLists.txt b/CMakeLists.txt index 09559a40a..fddc6f1f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ endif() # Compiler flags include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -fvisibility=default -std=gnu++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -fvisibility=default -std=gnu++14") # Tests if(BUILD_TESTS) diff --git a/README.md b/README.md index 51907a240..f03181354 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,11 @@ The formal foundations of this project are described in the following MAPL2019 p ## Installation -Triton is a fairly self-contained package and uses its own parser (forked from [wgtcc](https://github.com/wgtdkp/wgtcc)) and LLVM-8.0+ for code generation. +Triton is a fairly self-contained package and uses its own parser (forked from [wgtcc](https://github.com/wgtdkp/wgtcc)) and LLVM-10+ for code generation. You can install the latest release with pip as follows: ``` -sudo apt-get install llvm-9-dev +sudo apt-get install llvm-10-dev pip install triton ``` diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake index 30ebcbd89..dcc0488b5 100644 --- a/cmake/FindLLVM.cmake +++ b/cmake/FindLLVM.cmake @@ -28,7 +28,8 @@ # We also want an user-specified LLVM_ROOT_DIR to take precedence over the # system default locations such as /usr/local/bin. Executing find_program() # multiples times is the approach recommended in the docs. -set(llvm_config_names llvm-config-9 llvm-config-9.0 llvm-config90 +set(llvm_config_names llvm-config-10 llvm-config-10.0 llvm-config100 + llvm-config-9 llvm-config-9.0 llvm-config90 llvm-config-8 llvm-config-8.0 llvm-config80 llvm-config) find_program(LLVM_CONFIG diff --git a/lib/codegen/selection/generator.cc b/lib/codegen/selection/generator.cc index 18f2dafcd..dd9f0592c 100644 --- a/lib/codegen/selection/generator.cc +++ b/lib/codegen/selection/generator.cc @@ -695,7 +695,7 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) { builder_->CreateCondBr(rmw_msk, mask_then_bb, mask_done_bb); builder_->SetInsertPoint(mask_then_bb); builder_->CreateAtomicRMW(AtomicRMWInst::FAdd, rmw_ptr, rmw_val, - AtomicOrdering::Monotonic, + AtomicOrdering::Unordered, SyncScope::System); builder_->CreateBr(mask_done_bb); builder_->SetInsertPoint(mask_done_bb); diff --git a/lib/codegen/target.cc b/lib/codegen/target.cc index f63b4b899..253dfc709 100644 --- a/lib/codegen/target.cc +++ b/lib/codegen/target.cc @@ -2,6 +2,8 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/Value.h" #include "llvm/IR/IRBuilder.h" #include @@ -49,13 +51,7 @@ Value* amd_cl_target::get_block_id(Module *module, IRBuilder<>& builder, unsigne } Value* amd_cl_target::get_num_blocks(Module *module, IRBuilder<>& builder, unsigned ax) { - static std::array ids = { - Intrinsic::r600_read_ngroups_x, - Intrinsic::r600_read_ngroups_y, - Intrinsic::r600_read_ngroups_z - }; - Value* get_num_group = Intrinsic::getDeclaration(module, ids[ax]); - return builder.CreateCall(get_num_group, {}); + throw std::runtime_error("not implemented on AMD"); } Value* amd_cl_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) { diff --git a/lib/driver/module.cc b/lib/driver/module.cc index 20586f57f..526f93c8d 100755 --- a/lib/driver/module.cc +++ b/lib/driver/module.cc @@ -28,6 +28,8 @@ #include "llvm/IR/Verifier.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" @@ -115,8 +117,8 @@ void module::compile_llvm_module(std::unique_ptr module, const std // convert triton file type to llvm file type auto ll_file_type = [&](module::file_type_t type){ if(type == Object) - return llvm::TargetMachine::CGFT_ObjectFile; - return llvm::TargetMachine::CGFT_AssemblyFile; + return llvm::CodeGenFileType::CGFT_ObjectFile; + return llvm::CodeGenFileType::CGFT_AssemblyFile; }; // emit machine->addPassesToEmitFile(pass, stream, nullptr, ll_file_type(ft)); @@ -198,7 +200,7 @@ host_module::host_module(driver::context * context, std::unique_ptrerror); - builder.setMCJITMemoryManager(llvm::make_unique()); + builder.setMCJITMemoryManager(std::make_unique()); builder.setOptLevel(llvm::CodeGenOpt::Aggressive); builder.setEngineKind(llvm::EngineKind::JIT); hst_->engine = builder.create(); diff --git a/lib/lang/code_gen.cc b/lib/lang/code_gen.cc index 823062d45..94be4e639 100644 --- a/lib/lang/code_gen.cc +++ b/lib/lang/code_gen.cc @@ -278,10 +278,7 @@ void Generator::VisitFuncCall(FuncCall* funcCall) { ir::value* val = ret_; return set_ret(bld_->create_atomic_exch(ptr, val)); } - if(name == "f32_atomic_add" || - name == "atomic_add_32x32" || name == "atomic_add_32x64" || name == "atomic_add_32x128" || - name == "atomic_add_64x32" || name == "atomic_add_64x64" || name == "atomic_add_64x128" || - name == "atomic_add_128x32"|| name == "atomic_add_128x64"|| name == "atomic_add_128x128"){ + if(name.substr(0, 10) == "atomic_add"){ VisitExpr(funcCall->Args()->at(0)); ir::value* ptr = ret_; VisitExpr(funcCall->Args()->at(1)); diff --git a/lib/runtime/function.cc b/lib/runtime/function.cc index 3fd8b5dca..2e6bcfc2c 100644 --- a/lib/runtime/function.cc +++ b/lib/runtime/function.cc @@ -359,17 +359,11 @@ std::string function::preheader() { #define PASTER(a, b, _) a ## _ ## b #define EVALUATOR(a, b, _) PASTER(a, b, _) -#define atomic_add(TM, TN) EVALUATOR(atomic_add, EVALUATOR(TM, TN, x), _) -extern void atomic_add_64(float*[64], float[64], bool[64]); -extern void atomic_add_32x32(float*[32, 32], float[32, 32], bool[32, 32]); -extern void atomic_add_32x64(float*[32, 64], float[32, 64], bool[32, 64]); -extern void atomic_add_32x128(float*[32, 128], float[32, 128], bool[32, 128]); -extern void atomic_add_64x32(float*[64, 32], float[64, 32], bool[64, 32]); -extern void atomic_add_64x64(float*[64, 64], float[64, 64], bool[64, 64]); -extern void atomic_add_64x128(float*[64, 128], float[64, 128], bool[64, 128]); -extern void atomic_add_128x32(float*[128, 32], float[128, 32], bool[128, 32]); -extern void atomic_add_128x64(float*[128, 64], float[128, 64], bool[128, 64]); -extern void atomic_add_128x128(float*[128, 128], float[128, 128], bool[128, 128]); +#define atomic_add(TYPE, TM, TN) EVALUATOR(atomic_add, EVALUATOR(TYPE, EVALUATOR(TM, TN, x), _), _) +#define DECLARATION(TYPE, TM, TN) extern void atomic_add(TYPE, TM, TN)(TYPE*[TM, TN], TYPE[TM, TN], bool[TM, TN]) + +DECLARATION(float, 64, 64); +DECLARATION(half , 64, 64); extern int atomic_cas(int*, int, int); extern int atomic_xchg(int*, int); diff --git a/python/setup.py b/python/setup.py index 97fc52957..69e3aa58a 100644 --- a/python/setup.py +++ b/python/setup.py @@ -16,7 +16,7 @@ import torch def find_llvm(): - versions = ['-9.0', '-9', '-90', '-8.0', '-8', '-80', ''] + versions = ['-10', '-9.0', '-9', '-90', '-8.0', '-8', '-80', ''] supported = ['llvm-config{v}'.format(v=v) for v in versions] paths = [distutils.spawn.find_executable(cfg) for cfg in supported] paths = [p for p in paths if p is not None]