[FRONTEND] Added volatile flag for load (#407)

This commit is contained in:
Philippe Tillet
2021-12-30 22:33:24 -08:00
committed by GitHub
parent 985798f101
commit 03f1256f60
11 changed files with 51 additions and 37 deletions

View File

@@ -130,9 +130,9 @@ public:
value *create_xor(value *lhs, value *rhs); value *create_xor(value *lhs, value *rhs);
value *create_or(value *lhs, value *rhs); value *create_or(value *lhs, value *rhs);
// Input/Output // Input/Output
value *create_load(value *arg, load_inst::CACHE_MODIFIER cache); value *create_load(value *arg, load_inst::CACHE_MODIFIER cache, bool is_volatile);
value *create_store(value *ptr, value *val); value *create_store(value *ptr, value *val);
value *create_masked_load(value *arg, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache); value *create_masked_load(value *arg, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache, bool is_volatile);
value *create_masked_store(value *ptr, value *val, value *mask); value *create_masked_store(value *ptr, value *val, value *mask);
// Block instruction // Block instruction
value *create_splat(value *arg, const type::block_shapes_t &shapes); value *create_splat(value *arg, const type::block_shapes_t &shapes);

View File

@@ -68,7 +68,7 @@ struct dispatch{
static ir::value *cast(ir::value *input, ir::type *type, ir::builder *builder); static ir::value *cast(ir::value *input, ir::type *type, ir::builder *builder);
// memory operators // memory operators
static ir::value *load(ir::value* ptr, ir::value* mask, ir::value* other, const std::string &cache, ir::builder *builder); static ir::value *load(ir::value* ptr, ir::value* mask, ir::value* other, const std::string &cache, int is_volatile, ir::builder *builder);
static ir::value *store(ir::value* ptr, ir::value *value, ir::value *mask, ir::builder *builder); static ir::value *store(ir::value* ptr, ir::value *value, ir::value *mask, ir::builder *builder);
static ir::value *atomic_cas(ir::value* ptr, ir::value *cmp, ir::value *val, ir::builder *builder); static ir::value *atomic_cas(ir::value* ptr, ir::value *cmp, ir::value *val, ir::builder *builder);
static ir::value *atomic_add(ir::value* ptr, ir::value *val, ir::value *msk, ir::builder *builder); static ir::value *atomic_add(ir::value* ptr, ir::value *val, ir::value *msk, ir::builder *builder);

View File

@@ -402,8 +402,11 @@ public:
}; };
CACHE_MODIFIER get_cache_modifier() const { return cache_; } CACHE_MODIFIER get_cache_modifier() const { return cache_; }
bool get_is_volatile() const { return is_volatile_; }
protected: protected:
load_inst(value *ptr, value_id_t id, unsigned num_ops, CACHE_MODIFIER cache, load_inst(value *ptr, value_id_t id, unsigned num_ops, CACHE_MODIFIER cache,
bool is_volatile,
const std::string &name = "", instruction *next = nullptr); const std::string &name = "", instruction *next = nullptr);
std::string get_cache_modifier_repr() const { std::string get_cache_modifier_repr() const {
if (cache_ == CA) return ".ca"; if (cache_ == CA) return ".ca";
@@ -412,20 +415,24 @@ protected:
} }
CACHE_MODIFIER cache_; CACHE_MODIFIER cache_;
std::string get_volatile_repr() {
return is_volatile_ ? ".volatile" : "";
}
bool is_volatile_;
private: private:
static type *get_pointee_type(type *ty); static type *get_pointee_type(type *ty);
}; };
// unmasked load // unmasked load
class unmasked_load_inst: public load_inst { class unmasked_load_inst: public load_inst {
private: private:
std::string repr_impl() const { return "unmasked_load" + get_cache_modifier_repr(); } std::string repr_impl() const { return "unmasked_load" + get_cache_modifier_repr(); }
unmasked_load_inst(value *ptr, load_inst::CACHE_MODIFIER cache, const std::string &name, instruction *next); unmasked_load_inst(value *ptr, load_inst::CACHE_MODIFIER cache, bool is_volatile, const std::string &name, instruction *next);
public: public:
static unmasked_load_inst* create(value *ptr, static unmasked_load_inst* create(value *ptr,
CACHE_MODIFIER cache, CACHE_MODIFIER cache, bool is_volatile,
const std::string &name = "", const std::string &name = "",
instruction *next = nullptr); instruction *next = nullptr);
_TRITON_DEFINE_CLONE(unmasked_load_inst) _TRITON_DEFINE_CLONE(unmasked_load_inst)
@@ -436,7 +443,7 @@ public:
class masked_load_inst: public load_inst { class masked_load_inst: public load_inst {
private: private:
std::string repr_impl() const { return "masked_load" + get_cache_modifier_repr(); } std::string repr_impl() const { return "masked_load" + get_cache_modifier_repr(); }
masked_load_inst(value *ptr, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache, masked_load_inst(value *ptr, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache, bool is_volatile,
const std::string &name, instruction *next); const std::string &name, instruction *next);
public: public:
@@ -445,7 +452,7 @@ public:
value *get_false_value_operand() { return get_operand(2); } value *get_false_value_operand() { return get_operand(2); }
// factory method // factory method
static masked_load_inst* create(value *ptr, value *mask, value *false_value, static masked_load_inst* create(value *ptr, value *mask, value *false_value,
CACHE_MODIFIER cache, CACHE_MODIFIER cache, bool is_volatile,
const std::string &name = "", const std::string &name = "",
instruction *next = nullptr); instruction *next = nullptr);
_TRITON_DEFINE_CLONE(masked_load_inst) _TRITON_DEFINE_CLONE(masked_load_inst)

View File

@@ -629,7 +629,11 @@ void generator::visit_load_inst(ir::load_inst* x){
// ----- // -----
std::ostringstream asm_oss; std::ostringstream asm_oss;
asm_oss << "@$" << n_words; // predicate asm_oss << "@$" << n_words; // predicate
asm_oss << " ld.global"; asm_oss << " ld";
// std::cout << x->get_is_volatile() << std::endl;
if(x->get_is_volatile())
asm_oss << ".volatile";
asm_oss << ".global";
if (x->get_cache_modifier() == ir::load_inst::CA) asm_oss << ".ca"; if (x->get_cache_modifier() == ir::load_inst::CA) asm_oss << ".ca";
if (x->get_cache_modifier() == ir::load_inst::CG) asm_oss << ".cg"; if (x->get_cache_modifier() == ir::load_inst::CG) asm_oss << ".cg";
if(n_words > 1) if(n_words > 1)

View File

@@ -207,7 +207,8 @@ bool peephole::rewrite_select_masked_load(ir::instruction *value, ir::builder& b
ir::value* new_load = builder.create_masked_load(if_value->get_pointer_operand(), ir::value* new_load = builder.create_masked_load(if_value->get_pointer_operand(),
if_value->get_mask_operand(), if_value->get_mask_operand(),
select->get_else_value_op(), select->get_else_value_op(),
if_value->get_cache_modifier()); if_value->get_cache_modifier(),
if_value->get_is_volatile());
select->replace_all_uses_with(new_load); select->replace_all_uses_with(new_load);
return true; return true;
} }

View File

@@ -178,7 +178,7 @@ void pipeline::run(ir::module &mod) {
false_value = remat_false_value; false_value = remat_false_value;
} else } else
false_value = builder.create_splat(ir::undef_value::get(ty->get_scalar_ty()), ty->get_block_shapes()); false_value = builder.create_splat(ir::undef_value::get(ty->get_scalar_ty()), ty->get_block_shapes());
first_loads[0] = builder.create_masked_load(first_ptrs[0], first_masks[0], false_value, load->get_cache_modifier()); first_loads[0] = builder.create_masked_load(first_ptrs[0], first_masks[0], false_value, load->get_cache_modifier(), load->get_is_volatile());
for (int stage = 1; stage < num_stages-1; ++stage) { for (int stage = 1; stage < num_stages-1; ++stage) {
// mask is the loop condition of the previous iteration // mask is the loop condition of the previous iteration
@@ -193,7 +193,7 @@ void pipeline::run(ir::module &mod) {
first_masks[stage] = builder.create_and(first_masks[stage], remat_mask); first_masks[stage] = builder.create_and(first_masks[stage], remat_mask);
false_value = remat_false_value; false_value = remat_false_value;
} }
first_loads[stage] = builder.create_masked_load(first_ptrs[stage], first_masks[stage], false_value, load->get_cache_modifier()); first_loads[stage] = builder.create_masked_load(first_ptrs[stage], first_masks[stage], false_value, load->get_cache_modifier(), load->get_is_volatile());
} }
// create new phis for induction variables // create new phis for induction variables
@@ -222,7 +222,7 @@ void pipeline::run(ir::module &mod) {
next_mask = builder.create_and(next_mask, remat_mask); next_mask = builder.create_and(next_mask, remat_mask);
false_value = remat_false_value; false_value = remat_false_value;
} }
ir::value* next_load = builder.create_masked_load(next_ptr, next_mask, false_value, load->get_cache_modifier()); ir::value* next_load = builder.create_masked_load(next_ptr, next_mask, false_value, load->get_cache_modifier(), load->get_is_volatile());
// phi node // phi node
@@ -257,7 +257,7 @@ void pipeline::run(ir::module &mod) {
} }
else else
false_value = builder.create_splat(ir::undef_value::get(ty->get_scalar_ty()), ty->get_block_shapes()); false_value = builder.create_splat(ir::undef_value::get(ty->get_scalar_ty()), ty->get_block_shapes());
ir::value* first_load = builder.create_masked_load(first_ptr, first_mask, false_value, load->get_cache_modifier()); ir::value* first_load = builder.create_masked_load(first_ptr, first_mask, false_value, load->get_cache_modifier(), load->get_is_volatile());
// pre-fetch next iteration // pre-fetch next iteration
builder.set_insert_point(block->get_inst_list().back()); builder.set_insert_point(block->get_inst_list().back());
ir::value* next_ptr = ptr->get_value_for_block(block); ir::value* next_ptr = ptr->get_value_for_block(block);
@@ -268,7 +268,7 @@ void pipeline::run(ir::module &mod) {
next_mask = builder.create_and(next_mask, remat_mask); next_mask = builder.create_and(next_mask, remat_mask);
false_value = remat_false_value; false_value = remat_false_value;
} }
ir::value* next_load = builder.create_masked_load(next_ptr, next_mask, false_value, load->get_cache_modifier()); ir::value* next_load = builder.create_masked_load(next_ptr, next_mask, false_value, load->get_cache_modifier(), load->get_is_volatile());
// phi node // phi node
builder.set_insert_point(block->get_first_non_phi()); builder.set_insert_point(block->get_first_non_phi());
ir::phi_node* new_load = builder.create_phi(ty, 2); ir::phi_node* new_load = builder.create_phi(ty, 2);
@@ -327,4 +327,4 @@ void pipeline::run(ir::module &mod) {
} }
} }
} }

View File

@@ -273,16 +273,16 @@ DEFINE_FCMP_INSTR(UNE, cmp_pred_t::FCMP_UNE)
// load/store instructions // load/store instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
value *builder::create_load(value *ptr, load_inst::CACHE_MODIFIER cache){ value *builder::create_load(value *ptr, load_inst::CACHE_MODIFIER cache, bool is_volatile){
return insert(unmasked_load_inst::create(ptr, cache)); return insert(unmasked_load_inst::create(ptr, cache, is_volatile));
} }
value *builder::create_store(value *ptr, value *val){ value *builder::create_store(value *ptr, value *val){
return insert(unmasked_store_inst::create(ptr, val)); return insert(unmasked_store_inst::create(ptr, val));
} }
value *builder::create_masked_load(value *ptr, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache){ value *builder::create_masked_load(value *ptr, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache, bool is_volatile){
return insert(masked_load_inst::create(ptr, mask, false_value, cache)); return insert(masked_load_inst::create(ptr, mask, false_value, cache, is_volatile));
} }
value *builder::create_masked_store(value *ptr, value *val, value *mask){ value *builder::create_masked_store(value *ptr, value *val, value *mask){

View File

@@ -500,7 +500,7 @@ ir::value *dispatch::cast(ir::value *input, ir::type *dst_ty, ir::builder *build
// Memory Operators // Memory Operators
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
ir::value *dispatch::load(ir::value* ptr, ir::value* mask, ir::value* other, const std::string &cache_modifier, ir::builder* builder) { ir::value *dispatch::load(ir::value* ptr, ir::value* mask, ir::value* other, const std::string &cache_modifier, int is_volatile, ir::builder* builder) {
if(!ptr->get_type()->get_scalar_ty()->is_pointer_ty()) if(!ptr->get_type()->get_scalar_ty()->is_pointer_ty())
throw semantic_error("Pointer argument of load instruction is " + ptr->get_type()->repr()); throw semantic_error("Pointer argument of load instruction is " + ptr->get_type()->repr());
if(ptr->get_type()->is_block_ty()){ if(ptr->get_type()->is_block_ty()){
@@ -520,6 +520,7 @@ ir::value *dispatch::load(ir::value* ptr, ir::value* mask, ir::value* other, con
ptr_ty = pointer_type::get(elt_ty, ptr_ty->get_pointer_address_space()); ptr_ty = pointer_type::get(elt_ty, ptr_ty->get_pointer_address_space());
ptr = dispatch::cast(ptr, ptr_ty, builder); ptr = dispatch::cast(ptr, ptr_ty, builder);
} }
// cache modifier
load_inst::CACHE_MODIFIER cache = load_inst::NONE; // default load_inst::CACHE_MODIFIER cache = load_inst::NONE; // default
if (!cache_modifier.empty()) { if (!cache_modifier.empty()) {
if (cache_modifier == ".ca") if (cache_modifier == ".ca")
@@ -530,7 +531,7 @@ ir::value *dispatch::load(ir::value* ptr, ir::value* mask, ir::value* other, con
throw std::runtime_error(std::string("Cache modifier ") + cache_modifier + " not supported"); throw std::runtime_error(std::string("Cache modifier ") + cache_modifier + " not supported");
} }
if (!mask && !other) if (!mask && !other)
return builder->create_load(ptr, cache); return builder->create_load(ptr, cache, is_volatile);
if (!mask) if (!mask)
throw std::runtime_error("`other` cannot be provided without `mask`"); throw std::runtime_error("`other` cannot be provided without `mask`");
auto shape = ptr->get_type()->get_block_shapes(); auto shape = ptr->get_type()->get_block_shapes();
@@ -539,7 +540,7 @@ ir::value *dispatch::load(ir::value* ptr, ir::value* mask, ir::value* other, con
if(ptr->get_type()->is_block_ty()) if(ptr->get_type()->is_block_ty())
other = builder->create_splat(other, ptr->get_type()->get_block_shapes()); other = builder->create_splat(other, ptr->get_type()->get_block_shapes());
} }
return builder->create_masked_load(ptr, mask, other, cache); return builder->create_masked_load(ptr, mask, other, cache, is_volatile);
} }
ir::value *dispatch::store(ir::value* ptr, ir::value *val, ir::value* mask, ir::builder *builder) { ir::value *dispatch::store(ir::value* ptr, ir::value *val, ir::value* mask, ir::builder *builder) {

View File

@@ -434,8 +434,8 @@ io_inst::io_inst(type *ty, value_id_t id, unsigned num_ops, const std::string &n
{ } { }
// load_inst // load_inst
load_inst::load_inst(value *ptr, value_id_t id, unsigned num_ops, load_inst::CACHE_MODIFIER cache, const std::string &name, instruction *next) load_inst::load_inst(value *ptr, value_id_t id, unsigned num_ops, load_inst::CACHE_MODIFIER cache, bool is_volatile, const std::string &name, instruction *next)
: io_inst(get_pointee_type(ptr->get_type()), id, num_ops, name, next), cache_(cache) : io_inst(get_pointee_type(ptr->get_type()), id, num_ops, name, next), cache_(cache), is_volatile_(is_volatile)
{ } { }
// load // load
@@ -448,35 +448,35 @@ type *load_inst::get_pointee_type(type *ty) {
} }
// unmasked_load // unmasked_load
unmasked_load_inst::unmasked_load_inst(value *ptr, load_inst::CACHE_MODIFIER cache, const std::string &name, instruction *next) unmasked_load_inst::unmasked_load_inst(value *ptr, load_inst::CACHE_MODIFIER cache, bool is_volatile, const std::string &name, instruction *next)
: load_inst(ptr, INST_UNMASKED_LOAD, 1, cache, name, next) { : load_inst(ptr, INST_UNMASKED_LOAD, 1, cache, is_volatile, name, next) {
set_operand(0, ptr); set_operand(0, ptr);
} }
unmasked_load_inst* unmasked_load_inst::create(value *ptr, load_inst::CACHE_MODIFIER cache, const std::string &name, instruction *next) { unmasked_load_inst* unmasked_load_inst::create(value *ptr, load_inst::CACHE_MODIFIER cache, bool is_volatile, const std::string &name, instruction *next) {
return new unmasked_load_inst(ptr, cache, name, next); return new unmasked_load_inst(ptr, cache, is_volatile, name, next);
} }
// masked load // masked load
masked_load_inst::masked_load_inst(value *ptr, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache, masked_load_inst::masked_load_inst(value *ptr, value *mask, value *false_value, load_inst::CACHE_MODIFIER cache, bool is_volatile,
const std::string &name, instruction *next) const std::string &name, instruction *next)
: load_inst(ptr, INST_MASKED_LOAD, 3, cache, name, next) { : load_inst(ptr, INST_MASKED_LOAD, 3, cache, is_volatile, name, next) {
set_operand(0, ptr); set_operand(0, ptr);
set_operand(1, mask); set_operand(1, mask);
set_operand(2, false_value); set_operand(2, false_value);
} }
masked_load_inst* masked_load_inst::create(value *ptr, value *mask, value *false_value, masked_load_inst* masked_load_inst::create(value *ptr, value *mask, value *false_value,
load_inst::CACHE_MODIFIER cache, load_inst::CACHE_MODIFIER cache, bool is_volatile,
const std::string &name, instruction *next) { const std::string &name, instruction *next) {
return new masked_load_inst(ptr, mask, false_value, cache, name, next); return new masked_load_inst(ptr, mask, false_value, cache, is_volatile, name, next);
} }
// masked load async // masked load async
masked_load_async_inst::masked_load_async_inst(value *ptr, value *mask, value *false_value, masked_load_async_inst::masked_load_async_inst(value *ptr, value *mask, value *false_value,
load_inst::CACHE_MODIFIER cache, load_inst::CACHE_MODIFIER cache,
const std::string &name, instruction *next) const std::string &name, instruction *next)
: load_inst(ptr, INST_MASKED_LOAD_ASYNC, 3, cache, name, next) { : load_inst(ptr, INST_MASKED_LOAD_ASYNC, 3, cache, false, name, next) {
set_operand(0, ptr); set_operand(0, ptr);
set_operand(1, mask); set_operand(1, mask);
set_operand(2, false_value); set_operand(2, false_value);

View File

@@ -600,7 +600,8 @@ void init_triton_ir(py::module &&m) {
py::class_<ir::constant_int, ir::constant>(m, "constant_int") py::class_<ir::constant_int, ir::constant>(m, "constant_int")
.def_property_readonly("value", &ir::constant_int::get_value) .def_property_readonly("value", &ir::constant_int::get_value)
.def("__int__", [](ir::constant_int *self) { return self->get_value(); }); .def("__int__", [](ir::constant_int *self) { return self->get_value(); })
.def("__bool__", [](ir::constant_int *self) { return self->get_value(); });
py::class_<ir::constant_fp, ir::constant>(m, "constant_float") py::class_<ir::constant_fp, ir::constant>(m, "constant_float")
.def_property_readonly("value", &ir::constant_fp::get_value); .def_property_readonly("value", &ir::constant_fp::get_value);

View File

@@ -519,7 +519,7 @@ def dot(input, other, _builder=None):
@builtin @builtin
def load(pointer, mask=None, other=None, cache_modifier="", _builder=None): def load(pointer, mask=None, other=None, cache_modifier="", volatile=False, _builder=None):
""" """
Return a block of data whose values are, elementwise, loaded from memory at location defined by :code:`pointer`. Return a block of data whose values are, elementwise, loaded from memory at location defined by :code:`pointer`.
@@ -536,7 +536,7 @@ def load(pointer, mask=None, other=None, cache_modifier="", _builder=None):
:param cache_modifier: changes cache option in nvidia ptx :param cache_modifier: changes cache option in nvidia ptx
'type cache_modifier: str, optional 'type cache_modifier: str, optional
""" """
return frontend.load(pointer, mask, other, cache_modifier, _builder) return frontend.load(pointer, mask, other, cache_modifier, volatile, _builder)
@builtin @builtin