diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09559a40a..fddc6f1f6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@ endif()
 
 # Compiler flags
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -fvisibility=default -std=gnu++11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -fvisibility=default -std=gnu++14")
 
 # Tests
 if(BUILD_TESTS)
diff --git a/README.md b/README.md
index 51907a240..f03181354 100644
--- a/README.md
+++ b/README.md
@@ -14,11 +14,11 @@ The formal foundations of this project are described in the following MAPL2019 p
 
 ## Installation
 
-Triton is a fairly self-contained package and uses its own parser (forked from [wgtcc](https://github.com/wgtdkp/wgtcc)) and LLVM-8.0+ for code generation. 
+Triton is a fairly self-contained package and uses its own parser (forked from [wgtcc](https://github.com/wgtdkp/wgtcc)) and LLVM-10+ for code generation. 
 
 You can install the latest release with pip as follows:
 ```
-sudo apt-get install llvm-9-dev
+sudo apt-get install llvm-10-dev
 pip install triton
 ```
 
diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake
index 30ebcbd89..dcc0488b5 100644
--- a/cmake/FindLLVM.cmake
+++ b/cmake/FindLLVM.cmake
@@ -28,7 +28,8 @@
 # We also want an user-specified LLVM_ROOT_DIR to take precedence over the
 # system default locations such as /usr/local/bin. Executing find_program()
 # multiples times is the approach recommended in the docs.
-set(llvm_config_names llvm-config-9 llvm-config-9.0 llvm-config90
+set(llvm_config_names llvm-config-10 llvm-config-10.0 llvm-config100
+                      llvm-config-9 llvm-config-9.0 llvm-config90
                       llvm-config-8 llvm-config-8.0 llvm-config80
                       llvm-config)
 find_program(LLVM_CONFIG
diff --git a/lib/codegen/selection/generator.cc b/lib/codegen/selection/generator.cc
index 18f2dafcd..dd9f0592c 100644
--- a/lib/codegen/selection/generator.cc
+++ b/lib/codegen/selection/generator.cc
@@ -695,7 +695,7 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) {
       builder_->CreateCondBr(rmw_msk, mask_then_bb, mask_done_bb);
       builder_->SetInsertPoint(mask_then_bb);
       builder_->CreateAtomicRMW(AtomicRMWInst::FAdd, rmw_ptr, rmw_val,
-                                AtomicOrdering::Monotonic,
+                                AtomicOrdering::Unordered,
                                 SyncScope::System);
       builder_->CreateBr(mask_done_bb);
       builder_->SetInsertPoint(mask_done_bb);
diff --git a/lib/codegen/target.cc b/lib/codegen/target.cc
index f63b4b899..253dfc709 100644
--- a/lib/codegen/target.cc
+++ b/lib/codegen/target.cc
@@ -2,6 +2,8 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/IRBuilder.h"
 #include <iostream>
@@ -49,13 +51,7 @@ Value* amd_cl_target::get_block_id(Module *module, IRBuilder<>& builder, unsigne
 }
 
 Value* amd_cl_target::get_num_blocks(Module *module, IRBuilder<>& builder, unsigned ax) {
-  static std::array<Intrinsic::ID, 3> ids = {
-    Intrinsic::r600_read_ngroups_x,
-    Intrinsic::r600_read_ngroups_y,
-    Intrinsic::r600_read_ngroups_z
-  };
-  Value* get_num_group = Intrinsic::getDeclaration(module, ids[ax]);
-  return builder.CreateCall(get_num_group, {});
+  throw std::runtime_error("not implemented on AMD");
 }
 
 Value* amd_cl_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) {
diff --git a/lib/driver/module.cc b/lib/driver/module.cc
index 20586f57f..526f93c8d 100755
--- a/lib/driver/module.cc
+++ b/lib/driver/module.cc
@@ -28,6 +28,8 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -115,8 +117,8 @@ void module::compile_llvm_module(std::unique_ptr<llvm::Module> module, const std
   // convert triton file type to llvm file type
   auto ll_file_type = [&](module::file_type_t type){
     if(type == Object)
-      return llvm::TargetMachine::CGFT_ObjectFile;
-    return llvm::TargetMachine::CGFT_AssemblyFile;
+      return llvm::CodeGenFileType::CGFT_ObjectFile;
+    return llvm::CodeGenFileType::CGFT_AssemblyFile;
   };
   // emit
   machine->addPassesToEmitFile(pass, stream, nullptr, ll_file_type(ft));
@@ -198,7 +200,7 @@ host_module::host_module(driver::context * context, std::unique_ptr<llvm::Module
 
   llvm::EngineBuilder builder(std::move(src));
   builder.setErrorStr(&hst_->error);
-  builder.setMCJITMemoryManager(llvm::make_unique<llvm::SectionMemoryManager>());
+  builder.setMCJITMemoryManager(std::make_unique<llvm::SectionMemoryManager>());
   builder.setOptLevel(llvm::CodeGenOpt::Aggressive);
   builder.setEngineKind(llvm::EngineKind::JIT);
   hst_->engine = builder.create();
diff --git a/lib/lang/code_gen.cc b/lib/lang/code_gen.cc
index 823062d45..94be4e639 100644
--- a/lib/lang/code_gen.cc
+++ b/lib/lang/code_gen.cc
@@ -278,10 +278,7 @@ void Generator::VisitFuncCall(FuncCall* funcCall) {
     ir::value* val = ret_;
     return set_ret(bld_->create_atomic_exch(ptr, val));
   }
-  if(name == "f32_atomic_add" || 
-     name == "atomic_add_32x32" || name == "atomic_add_32x64" || name == "atomic_add_32x128" ||
-     name == "atomic_add_64x32" || name == "atomic_add_64x64" || name == "atomic_add_64x128" ||
-     name == "atomic_add_128x32"|| name == "atomic_add_128x64"|| name == "atomic_add_128x128"){
+  if(name.substr(0, 10) == "atomic_add"){
     VisitExpr(funcCall->Args()->at(0));
     ir::value* ptr = ret_;
     VisitExpr(funcCall->Args()->at(1));
diff --git a/lib/runtime/function.cc b/lib/runtime/function.cc
index 3fd8b5dca..2e6bcfc2c 100644
--- a/lib/runtime/function.cc
+++ b/lib/runtime/function.cc
@@ -359,17 +359,11 @@ std::string function::preheader() {
 
 #define PASTER(a, b, _) a ## _ ## b
 #define EVALUATOR(a, b, _)  PASTER(a, b, _)
-#define atomic_add(TM, TN) EVALUATOR(atomic_add, EVALUATOR(TM, TN, x), _)
-extern void atomic_add_64(float*[64], float[64], bool[64]);
-extern void atomic_add_32x32(float*[32, 32], float[32, 32], bool[32, 32]);
-extern void atomic_add_32x64(float*[32, 64], float[32, 64], bool[32, 64]);
-extern void atomic_add_32x128(float*[32, 128], float[32, 128], bool[32, 128]);
-extern void atomic_add_64x32(float*[64, 32], float[64, 32], bool[64, 32]);
-extern void atomic_add_64x64(float*[64, 64], float[64, 64], bool[64, 64]);
-extern void atomic_add_64x128(float*[64, 128], float[64, 128], bool[64, 128]);
-extern void atomic_add_128x32(float*[128, 32], float[128, 32], bool[128, 32]);
-extern void atomic_add_128x64(float*[128, 64], float[128, 64], bool[128, 64]);
-extern void atomic_add_128x128(float*[128, 128], float[128, 128], bool[128, 128]);
+#define atomic_add(TYPE, TM, TN) EVALUATOR(atomic_add, EVALUATOR(TYPE, EVALUATOR(TM, TN, x), _), _)
+#define DECLARATION(TYPE, TM, TN) extern void atomic_add(TYPE, TM, TN)(TYPE*[TM, TN], TYPE[TM, TN], bool[TM, TN])
+
+DECLARATION(float, 64, 64);
+DECLARATION(half , 64, 64);
 
 extern int atomic_cas(int*, int, int);
 extern int atomic_xchg(int*, int);
diff --git a/python/setup.py b/python/setup.py
index 97fc52957..69e3aa58a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -16,7 +16,7 @@ import torch
 
 
 def find_llvm():
-    versions = ['-9.0', '-9', '-90', '-8.0', '-8', '-80', '']
+    versions = ['-10', '-9.0', '-9', '-90', '-8.0', '-8', '-80', '']
     supported = ['llvm-config{v}'.format(v=v) for v in versions]
     paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
     paths = [p for p in paths if p is not None]