[CODEGEN] Fixed bug in atomic_add
This commit is contained in:
@@ -431,6 +431,18 @@ public:
|
||||
_TRITON_DEFINE_ACCEPT(masked_load_inst)
|
||||
};
|
||||
|
||||
class atomic_add_inst: public io_inst {
|
||||
private:
|
||||
atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
|
||||
std::string repr_impl() const { return "atomic_add"; }
|
||||
_TRITON_DEFINE_CLONE(atomic_add_inst)
|
||||
_TRITON_DEFINE_ACCEPT(atomic_add_inst)
|
||||
|
||||
public:
|
||||
static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
|
||||
};
|
||||
|
||||
|
||||
// store
|
||||
class store_inst: public io_inst {
|
||||
protected:
|
||||
@@ -601,16 +613,6 @@ public:
|
||||
static instruction* create(value *ptr, value *val, const std::string &name = "", instruction *next = nullptr);
|
||||
};
|
||||
|
||||
class atomic_add_inst: public builtin_inst {
|
||||
private:
|
||||
atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
|
||||
std::string repr_impl() const { return "atomic_add"; }
|
||||
_TRITON_DEFINE_CLONE(atomic_add_inst)
|
||||
_TRITON_DEFINE_ACCEPT(atomic_add_inst)
|
||||
|
||||
public:
|
||||
static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
|
||||
};
|
||||
|
||||
class exp_inst: public builtin_inst {
|
||||
private:
|
||||
|
@@ -312,6 +312,7 @@ std::vector<unsigned> align::populate_max_contiguous_gep(ir::getelementptr_inst*
|
||||
if(rhs_cst_info[d].num_cst)
|
||||
rvalue = lhs_max_contiguous[d];
|
||||
result[d] = std::max(lvalue, rvalue);
|
||||
// std::cout << "max contiguous: " << x->get_name() << " " << d << " " << result[d] << std::endl;
|
||||
}
|
||||
return add_to_cache(x, result, max_contiguous_);
|
||||
}
|
||||
@@ -416,8 +417,10 @@ std::vector<unsigned> align::populate_starting_multiple_gep(ir::getelementptr_in
|
||||
auto lhs = populate_starting_multiple(x->get_operand(0));
|
||||
auto rhs = populate_starting_multiple(x->get_operand(1));
|
||||
std::vector<unsigned> result(lhs.size(), 1);
|
||||
for(size_t d = 0; d < lhs.size(); d++)
|
||||
for(size_t d = 0; d < lhs.size(); d++){
|
||||
result[d] = gcd(lhs[d], rhs[d]);
|
||||
// std::cout << "starting multiple: " << x->get_name() << " " << d << " " << result[d] << std::endl;
|
||||
}
|
||||
return add_to_cache(x, result, starting_multiple_);
|
||||
}
|
||||
|
||||
|
@@ -108,6 +108,10 @@ data_layout::data_layout(id_t id,
|
||||
std::sort(order_.begin(), order_.end(), [&](unsigned a, unsigned b) {
|
||||
return max_contiguous[a] > max_contiguous[b];
|
||||
});
|
||||
// std::cout << "===" << std::endl;
|
||||
// std::cout << (*largest)->get_name() << std::endl;
|
||||
// for(ir::value* x: ptr)
|
||||
// std::cout << x->get_name() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -463,6 +463,20 @@ masked_load_inst* masked_load_inst::create(value *ptr, value *mask, value *false
|
||||
return new masked_load_inst(ptr, mask, false_value, name, next);
|
||||
}
|
||||
|
||||
// atomic add
|
||||
|
||||
atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next)
|
||||
: io_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) {
|
||||
set_operand(0, ptr);
|
||||
set_operand(1, val);
|
||||
set_operand(2, msk);
|
||||
}
|
||||
|
||||
instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) {
|
||||
return new atomic_add_inst(ptr, val, msk, name, next);
|
||||
}
|
||||
|
||||
// store
|
||||
|
||||
store_inst::store_inst(value *ptr, value_id_t id, unsigned num_ops, const std::string &name, instruction *next)
|
||||
: io_inst(type::get_void_ty(ptr->get_type()->get_context()), id, num_ops, name, next)
|
||||
@@ -734,18 +748,6 @@ instruction* atomic_exch_inst::create(value *ptr, value *val, const std::string
|
||||
return new atomic_exch_inst(ptr, val, name, next);
|
||||
}
|
||||
|
||||
// atomic add
|
||||
|
||||
atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next)
|
||||
: builtin_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) {
|
||||
set_operand(0, ptr);
|
||||
set_operand(1, val);
|
||||
set_operand(2, msk);
|
||||
}
|
||||
|
||||
instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) {
|
||||
return new atomic_add_inst(ptr, val, msk, name, next);
|
||||
}
|
||||
|
||||
// exp
|
||||
|
||||
|
@@ -230,6 +230,7 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module,
|
||||
}
|
||||
peephole.run(module);
|
||||
dce.run(module);
|
||||
// ir::print(module, std::cout);
|
||||
align.run(module);
|
||||
axes.run(module);
|
||||
layouts.run(module);
|
||||
@@ -238,7 +239,6 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module,
|
||||
if(allocation.allocated_size() > context->device()->max_shared_memory())
|
||||
throw std::runtime_error("using too much shared memory");
|
||||
barriers.run(module);
|
||||
//ir::print(module, std::cout);
|
||||
isel.visit(module, *llvm);
|
||||
std::unique_ptr<driver::module> res(driver::module::create(context, std::move(llvm)));
|
||||
return res;
|
||||
@@ -364,6 +364,8 @@ std::string function::preheader() {
|
||||
|
||||
DECLARATION(float, 64, 64);
|
||||
DECLARATION(half , 64, 64);
|
||||
DECLARATION(half , 64, 128);
|
||||
DECLARATION(half , 128, 64);
|
||||
DECLARATION(half , 128, 128);
|
||||
|
||||
extern int atomic_cas(int*, int, int);
|
||||
|
Reference in New Issue
Block a user