From 7d50b87681f76923375ac2e29311a9f25ffacd6c Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Tue, 11 Jun 2019 10:45:19 -0700 Subject: [PATCH] [selection/codegen] bugfix in distributed tile indices initialization --- lib/codegen/selection.cpp | 7 ++----- lib/driver/module.cpp | 8 +++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/codegen/selection.cpp b/lib/codegen/selection.cpp index fa000d6a7..5463e7090 100644 --- a/lib/codegen/selection.cpp +++ b/lib/codegen/selection.cpp @@ -29,7 +29,8 @@ void distributed_tile::init_indices() { indices_t current; for(size_t d = 0; d < id.size(); d++) current.push_back(axes_[d].values[id[d]]); - indices_[current] = indices_.size(); + size_t sz = indices_.size(); + indices_[current] = sz; values_[current] = UndefValue::get(ty_); ordered_indices_.push_back(current); id[0]++; @@ -840,10 +841,6 @@ void selection::lower_tile_instruction(ir::instruction *ins, llvm::IRBuilder<> & unsigned id = linear / vector_size; if(linear % vector_size == 0) packets[id] = result->get_value(idx); - }); - in->for_each([&](indices_t idx){ - unsigned linear = in->get_linear_index(idx); - unsigned id = linear / vector_size; packets[id] = builder.CreateInsertElement(packets.at(id), in->get_value(idx), linear % vector_size); }); result->for_each([&](indices_t idx){ diff --git a/lib/driver/module.cpp b/lib/driver/module.cpp index ebc876559..c3139ece6 100755 --- a/lib/driver/module.cpp +++ b/lib/driver/module.cpp @@ -243,10 +243,12 @@ std::string cu_module::compile_llvm_module(llvm::Module* module) { layout += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; // create llvm::SmallVector buffer; - module::compile_llvm_module(module, "nvptx64-nvidia-cuda", "sm_75", layout, buffer, "", Assembly); + module::compile_llvm_module(module, "nvptx64-nvidia-cuda", "sm_70", layout, buffer, "", Assembly); std::string result(buffer.begin(), buffer.end()); - std::string to_replace = ".version 6.3"; - result.replace(result.find(to_replace), to_replace.size(), ".version 6.4"); + size_t start_replace = result.find(".version"); + size_t end_replace = result.find('\n', start_replace); + assert(start_replace != std::string::npos); + result.replace(start_replace, end_replace - start_replace, ".version 6.4"); return result; }