Merge branch 'master' into v2.0

2022-01-30 20:25:01 -08:00
parent 807d8a1945 6f7acad48f
commit 2922dc141c
3 changed files with 3 additions and 59 deletions
--- a/lib/driver/llvm.cc
+++ b/lib/driver/llvm.cc
@@ -218,63 +218,6 @@ std::string ptx_to_cubin(const std::string& ptx, int cc) {
  return cubin;
 }

-//CUmodule ptx_to_cumodule(const std::string& ptx, int cc) {
-//  // JIT compile source-code
-//  try{
-//    // use ptxas if present in PATH. Otherwise, use JIT from the driver
-//    std::string ptxas = "ptxas";
-//    std::string version;
-//    int use_system_ptxas = tools::exec(ptxas + " --version 2>&1", version) == 0;
-
-//    // Use PTXAS via system call
-//    if(use_system_ptxas){
-//      // compile ptx with ptxas
-//      char _fsrc[] = "/tmp/triton_k_XXXXXX";
-//      char _flog[] = "/tmp/triton_l_XXXXXX";
-//      mkstemp(_fsrc);
-//      mkstemp(_flog);
-//      std::string fsrc = _fsrc;
-//      std::string flog = _flog;
-//      std::string fbin = fsrc + ".o";
-//      const char* _fbin = fbin.c_str();
-//      std::ofstream ofs(fsrc);
-//      ofs << ptx;
-//      ofs.close();
-//      std::string cmd;
-//      int err;
-//      cmd = ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog;
-//      err = system(cmd.c_str());
-//      CUmodule ret;
-//      std::ifstream _cubin(_fbin, std::ios::binary );
-//      std::string cubin(std::istreambuf_iterator<char>(_cubin), {});
-//      _cubin.close();
-//      dispatch::cuModuleLoadData(&ret, cubin.c_str());
-//      unlink(_fsrc);
-//      unlink(_flog);
-//      unlink(_fbin);
-//      return ret;
-//    }
-
-//    // Use PTXAS included in driver
-//    CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER,
-//                          CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, CU_JIT_INFO_LOG_BUFFER,
-//                          CU_JIT_LOG_VERBOSE};
-//    unsigned int errbufsize = 8192;
-//    unsigned int logbufsize = 8192;
-//    char _err[errbufsize];
-//    char _log[logbufsize];
-//    void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)_err, (void*)(uintptr_t)logbufsize, (void*)_log, (void*)1};
-//    CUmodule ret;
-//    dispatch::cuModuleLoadDataEx(&ret, ptx.data(), 5, opt, optval);
-//    return ret;
-//  }
-//  catch(exception::cuda::invalid_ptx const &){
-//    std::cout << ptx << std::endl;
-//    std::cerr << "It appears that Triton produced invalid PTX code:" << std::endl;
-//    throw;
-//  }
-//}
-
 /* ------------------------ */
 //         HIP              //
 /* ------------------------ */
--- a/python/triton/code_gen.py
+++ b/python/triton/code_gen.py
@@ -472,6 +472,7 @@ class CodeGenerator(ast.NodeVisitor):
        def visit_Str(self, node):
            return triton.language.constexpr(ast.literal_eval(node))

+
    def visit_Attribute(self, node):
        lhs = self.visit(node.value)
        return getattr(lhs, node.attr)
--- a/python/tutorials/01-vector-add.py
+++ b/python/tutorials/01-vector-add.py
@@ -38,8 +38,8 @@ def add_kernel(
    offsets = block_start + tl.arange(0, BLOCK_SIZE)
    # Create a mask to guard memory operations against out-of-bounds accesses
    mask = offsets < n_elements
-    # Load x and y from DRAM, masking out any extra elements in case
-    # the input is not a multiple of the block size
+    # Load x and y from DRAM, masking out any extra elements in case the input is not a
+    # multiple of the block size
    x = tl.load(x_ptr + offsets, mask=mask)
    y = tl.load(y_ptr + offsets, mask=mask)
    output = x + y