From baa858aa7435b848e93ce7c1dc5a2bf65219e6cf Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Thu, 19 Nov 2020 18:19:55 -0500 Subject: [PATCH] [CODEGEN] Fixed bug in atomic_add --- include/triton/ir/instructions.h | 22 ++++++++++++---------- lib/codegen/analysis/align.cc | 5 ++++- lib/codegen/analysis/layout.cc | 4 ++++ lib/ir/instructions.cc | 26 ++++++++++++++------------ lib/runtime/function.cc | 4 +++- 5 files changed, 37 insertions(+), 24 deletions(-) diff --git a/include/triton/ir/instructions.h b/include/triton/ir/instructions.h index 755fb172d..23219b6c9 100644 --- a/include/triton/ir/instructions.h +++ b/include/triton/ir/instructions.h @@ -431,6 +431,18 @@ public: _TRITON_DEFINE_ACCEPT(masked_load_inst) }; +class atomic_add_inst: public io_inst { +private: + atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr); + std::string repr_impl() const { return "atomic_add"; } + _TRITON_DEFINE_CLONE(atomic_add_inst) + _TRITON_DEFINE_ACCEPT(atomic_add_inst) + +public: + static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr); +}; + + // store class store_inst: public io_inst { protected: @@ -601,16 +613,6 @@ public: static instruction* create(value *ptr, value *val, const std::string &name = "", instruction *next = nullptr); }; -class atomic_add_inst: public builtin_inst { -private: - atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr); - std::string repr_impl() const { return "atomic_add"; } - _TRITON_DEFINE_CLONE(atomic_add_inst) - _TRITON_DEFINE_ACCEPT(atomic_add_inst) - -public: - static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr); -}; class exp_inst: public builtin_inst { private: diff --git a/lib/codegen/analysis/align.cc b/lib/codegen/analysis/align.cc index 8eae89f86..a40257bdb 100644 --- a/lib/codegen/analysis/align.cc +++ b/lib/codegen/analysis/align.cc @@ -312,6 +312,7 @@ std::vector align::populate_max_contiguous_gep(ir::getelementptr_inst* if(rhs_cst_info[d].num_cst) rvalue = lhs_max_contiguous[d]; result[d] = std::max(lvalue, rvalue); +// std::cout << "max contiguous: " << x->get_name() << " " << d << " " << result[d] << std::endl; } return add_to_cache(x, result, max_contiguous_); } @@ -416,8 +417,10 @@ std::vector align::populate_starting_multiple_gep(ir::getelementptr_in auto lhs = populate_starting_multiple(x->get_operand(0)); auto rhs = populate_starting_multiple(x->get_operand(1)); std::vector result(lhs.size(), 1); - for(size_t d = 0; d < lhs.size(); d++) + for(size_t d = 0; d < lhs.size(); d++){ result[d] = gcd(lhs[d], rhs[d]); +// std::cout << "starting multiple: " << x->get_name() << " " << d << " " << result[d] << std::endl; + } return add_to_cache(x, result, starting_multiple_); } diff --git a/lib/codegen/analysis/layout.cc b/lib/codegen/analysis/layout.cc index 5397642bc..9c9929d01 100644 --- a/lib/codegen/analysis/layout.cc +++ b/lib/codegen/analysis/layout.cc @@ -108,6 +108,10 @@ data_layout::data_layout(id_t id, std::sort(order_.begin(), order_.end(), [&](unsigned a, unsigned b) { return max_contiguous[a] > max_contiguous[b]; }); +// std::cout << "===" << std::endl; +// std::cout << (*largest)->get_name() << std::endl; +// for(ir::value* x: ptr) +// std::cout << x->get_name() << std::endl; } } diff --git a/lib/ir/instructions.cc b/lib/ir/instructions.cc index 828d7081e..684c0d7d9 100644 --- a/lib/ir/instructions.cc +++ b/lib/ir/instructions.cc @@ -463,6 +463,20 @@ masked_load_inst* masked_load_inst::create(value *ptr, value *mask, value *false return new masked_load_inst(ptr, mask, false_value, name, next); } +// atomic add + +atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next) + : io_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) { + set_operand(0, ptr); + set_operand(1, val); + set_operand(2, msk); +} + +instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) { + return new atomic_add_inst(ptr, val, msk, name, next); +} + +// store store_inst::store_inst(value *ptr, value_id_t id, unsigned num_ops, const std::string &name, instruction *next) : io_inst(type::get_void_ty(ptr->get_type()->get_context()), id, num_ops, name, next) @@ -734,18 +748,6 @@ instruction* atomic_exch_inst::create(value *ptr, value *val, const std::string return new atomic_exch_inst(ptr, val, name, next); } -// atomic add - -atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next) - : builtin_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) { - set_operand(0, ptr); - set_operand(1, val); - set_operand(2, msk); -} - -instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) { - return new atomic_add_inst(ptr, val, msk, name, next); -} // exp diff --git a/lib/runtime/function.cc b/lib/runtime/function.cc index 579aa46f0..b60074a0f 100644 --- a/lib/runtime/function.cc +++ b/lib/runtime/function.cc @@ -230,6 +230,7 @@ std::unique_ptr function::make_bin(ir::module &module, } peephole.run(module); dce.run(module); +// ir::print(module, std::cout); align.run(module); axes.run(module); layouts.run(module); @@ -238,7 +239,6 @@ std::unique_ptr function::make_bin(ir::module &module, if(allocation.allocated_size() > context->device()->max_shared_memory()) throw std::runtime_error("using too much shared memory"); barriers.run(module); - //ir::print(module, std::cout); isel.visit(module, *llvm); std::unique_ptr res(driver::module::create(context, std::move(llvm))); return res; @@ -364,6 +364,8 @@ std::string function::preheader() { DECLARATION(float, 64, 64); DECLARATION(half , 64, 64); +DECLARATION(half , 64, 128); +DECLARATION(half , 128, 64); DECLARATION(half , 128, 128); extern int atomic_cas(int*, int, int);