[CODEGEN] Fixed bug in atomic_add

This commit is contained in:
Philippe Tillet
2020-11-19 18:19:55 -05:00
parent 7d095ec686
commit baa858aa74
5 changed files with 37 additions and 24 deletions

View File

@@ -431,6 +431,18 @@ public:
_TRITON_DEFINE_ACCEPT(masked_load_inst)
};
class atomic_add_inst: public io_inst {
private:
atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
std::string repr_impl() const { return "atomic_add"; }
_TRITON_DEFINE_CLONE(atomic_add_inst)
_TRITON_DEFINE_ACCEPT(atomic_add_inst)
public:
static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
};
// store
class store_inst: public io_inst {
protected:
@@ -601,16 +613,6 @@ public:
static instruction* create(value *ptr, value *val, const std::string &name = "", instruction *next = nullptr);
};
class atomic_add_inst: public builtin_inst {
private:
atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
std::string repr_impl() const { return "atomic_add"; }
_TRITON_DEFINE_CLONE(atomic_add_inst)
_TRITON_DEFINE_ACCEPT(atomic_add_inst)
public:
static instruction* create(value *ptr, value *val, value *msk, const std::string &name = "", instruction *next = nullptr);
};
class exp_inst: public builtin_inst {
private:

View File

@@ -312,6 +312,7 @@ std::vector<unsigned> align::populate_max_contiguous_gep(ir::getelementptr_inst*
if(rhs_cst_info[d].num_cst)
rvalue = lhs_max_contiguous[d];
result[d] = std::max(lvalue, rvalue);
// std::cout << "max contiguous: " << x->get_name() << " " << d << " " << result[d] << std::endl;
}
return add_to_cache(x, result, max_contiguous_);
}
@@ -416,8 +417,10 @@ std::vector<unsigned> align::populate_starting_multiple_gep(ir::getelementptr_in
auto lhs = populate_starting_multiple(x->get_operand(0));
auto rhs = populate_starting_multiple(x->get_operand(1));
std::vector<unsigned> result(lhs.size(), 1);
for(size_t d = 0; d < lhs.size(); d++)
for(size_t d = 0; d < lhs.size(); d++){
result[d] = gcd(lhs[d], rhs[d]);
// std::cout << "starting multiple: " << x->get_name() << " " << d << " " << result[d] << std::endl;
}
return add_to_cache(x, result, starting_multiple_);
}

View File

@@ -108,6 +108,10 @@ data_layout::data_layout(id_t id,
std::sort(order_.begin(), order_.end(), [&](unsigned a, unsigned b) {
return max_contiguous[a] > max_contiguous[b];
});
// std::cout << "===" << std::endl;
// std::cout << (*largest)->get_name() << std::endl;
// for(ir::value* x: ptr)
// std::cout << x->get_name() << std::endl;
}
}

View File

@@ -463,6 +463,20 @@ masked_load_inst* masked_load_inst::create(value *ptr, value *mask, value *false
return new masked_load_inst(ptr, mask, false_value, name, next);
}
// atomic add
atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next)
: io_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) {
set_operand(0, ptr);
set_operand(1, val);
set_operand(2, msk);
}
instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) {
return new atomic_add_inst(ptr, val, msk, name, next);
}
// store
store_inst::store_inst(value *ptr, value_id_t id, unsigned num_ops, const std::string &name, instruction *next)
: io_inst(type::get_void_ty(ptr->get_type()->get_context()), id, num_ops, name, next)
@@ -734,18 +748,6 @@ instruction* atomic_exch_inst::create(value *ptr, value *val, const std::string
return new atomic_exch_inst(ptr, val, name, next);
}
// atomic add
atomic_add_inst::atomic_add_inst(value *ptr, value *val, value *msk, const std::string &name, instruction *next)
: builtin_inst(ptr->get_type()->get_pointer_element_ty(), INST_ATOMIC_ADD, 3, name, next) {
set_operand(0, ptr);
set_operand(1, val);
set_operand(2, msk);
}
instruction* atomic_add_inst::create(value *ptr, value *val, value *msk, const std::string &name, instruction *next) {
return new atomic_add_inst(ptr, val, msk, name, next);
}
// exp

View File

@@ -230,6 +230,7 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module,
}
peephole.run(module);
dce.run(module);
// ir::print(module, std::cout);
align.run(module);
axes.run(module);
layouts.run(module);
@@ -238,7 +239,6 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module,
if(allocation.allocated_size() > context->device()->max_shared_memory())
throw std::runtime_error("using too much shared memory");
barriers.run(module);
//ir::print(module, std::cout);
isel.visit(module, *llvm);
std::unique_ptr<driver::module> res(driver::module::create(context, std::move(llvm)));
return res;
@@ -364,6 +364,8 @@ std::string function::preheader() {
DECLARATION(float, 64, 64);
DECLARATION(half , 64, 64);
DECLARATION(half , 64, 128);
DECLARATION(half , 128, 64);
DECLARATION(half , 128, 128);
extern int atomic_cas(int*, int, int);