[CODEGEN] Fixed bug in atomic_add

2020-11-19 18:19:55 -05:00
parent 7d095ec686
commit baa858aa74
5 changed files with 37 additions and 24 deletions
--- a/include/triton/ir/instructions.h
+++ b/include/triton/ir/instructions.h
@@ -431,6 +431,18 @@ public:
  _TRITON_DEFINE_ACCEPT(masked_load_inst)
 };

+class atomic_add_inst: public io_inst {
+private:
+  atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
+  std::string repr_impl() const { return "atomic_add"; }
+  _TRITON_DEFINE_CLONE(atomic_add_inst)
+  _TRITON_DEFINE_ACCEPT(atomic_add_inst)
+
+public:
+  static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
+};
+
+
 // store
 class store_inst: public io_inst {
 protected:
@@ -601,16 +613,6 @@ public:
  static instruction* create(value *ptr, value *val, const std::string &name = "", instruction *next = nullptr);
 };

-class atomic_add_inst: public builtin_inst {
-private:
-  atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
-  std::string repr_impl() const { return "atomic_add"; }
-  _TRITON_DEFINE_CLONE(atomic_add_inst)
-  _TRITON_DEFINE_ACCEPT(atomic_add_inst)
-
-public:
-  static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
-};

 class exp_inst: public builtin_inst {
 private:
--- a/lib/codegen/analysis/align.cc
+++ b/lib/codegen/analysis/align.cc
@@ -312,6 +312,7 @@ std::vector<unsigned> align::populate_max_contiguous_gep(ir::getelementptr_inst*
    if(rhs_cst_info[d].num_cst)
      rvalue = lhs_max_contiguous[d];
    result[d] = std::max(lvalue, rvalue);
+//    std::cout << "max contiguous: " << x->get_name() << " " << d << " " << result[d] << std::endl;
  }
  return add_to_cache(x, result, max_contiguous_);
 }
@@ -416,8 +417,10 @@ std::vector<unsigned> align::populate_starting_multiple_gep(ir::getelementptr_in
  auto lhs = populate_starting_multiple(x->get_operand(0));
  auto rhs = populate_starting_multiple(x->get_operand(1));
  std::vector<unsigned> result(lhs.size(), 1);
-  for(size_t d = 0; d < lhs.size(); d++)
+  for(size_t d = 0; d < lhs.size(); d++){
    result[d] = gcd(lhs[d], rhs[d]);
+//    std::cout << "starting multiple: " << x->get_name() << " " << d << " " << result[d] << std::endl;
+  }
  return add_to_cache(x, result, starting_multiple_);
 }

--- a/lib/codegen/analysis/layout.cc
+++ b/lib/codegen/analysis/layout.cc
@@ -108,6 +108,10 @@ data_layout::data_layout(id_t id,
    std::sort(order_.begin(), order_.end(), [&](unsigned a, unsigned b) {
      return max_contiguous[a] > max_contiguous[b];
    });
+//    std::cout << "===" << std::endl;
+//    std::cout << (*largest)->get_name() << std::endl;
+//    for(ir::value* x: ptr)
+//      std::cout << x->get_name() << std::endl;
  }
 }

--- a/lib/ir/instructions.cc
+++ b/lib/ir/instructions.cc
@@ -463,6 +463,20 @@ masked_load_inst* masked_load_inst::create(value *ptr, value *mask, value *false
  return new masked_load_inst(ptr, mask, false_value, name, next);
 }

+// atomic add
+
+atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next)
+  : io_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) {
+  set_operand(0, ptr);
+  set_operand(1, val);
+  set_operand(2, msk);
+}
+
+instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) {
+  return new atomic_add_inst(ptr, val, msk, name, next);
+}
+
+// store

 store_inst::store_inst(value *ptr, value_id_t id, unsigned num_ops, const std::string &name, instruction *next)
  : io_inst(type::get_void_ty(ptr->get_type()->get_context()), id, num_ops, name, next)
@@ -734,18 +748,6 @@ instruction* atomic_exch_inst::create(value *ptr, value *val, const std::string
  return new atomic_exch_inst(ptr, val, name, next);
 }

-// atomic add
-
-atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next)
-  : builtin_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) {
-  set_operand(0, ptr);
-  set_operand(1, val);
-  set_operand(2, msk);
-}
-
-instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) {
-  return new atomic_add_inst(ptr, val, msk, name, next);
-}

 // exp

--- a/lib/runtime/function.cc
+++ b/lib/runtime/function.cc
@@ -230,6 +230,7 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module,
  }
  peephole.run(module);
  dce.run(module);
+//  ir::print(module, std::cout);
  align.run(module);
  axes.run(module);
  layouts.run(module);
@@ -238,7 +239,6 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module,
  if(allocation.allocated_size() > context->device()->max_shared_memory())
    throw std::runtime_error("using too much shared memory");
  barriers.run(module);
-  //ir::print(module, std::cout);
  isel.visit(module, *llvm);
  std::unique_ptr<driver::module> res(driver::module::create(context, std::move(llvm)));
  return res;
@@ -364,6 +364,8 @@ std::string function::preheader() {

 DECLARATION(float, 64, 64);
 DECLARATION(half , 64, 64);
+DECLARATION(half , 64, 128);
+DECLARATION(half , 128, 64);
 DECLARATION(half , 128, 128);

 extern int atomic_cas(int*, int, int);