[codegen][coalesce] fixed stale users in cloned instructions
This commit is contained in:
@@ -49,7 +49,7 @@ public:
|
|||||||
grids(size_t num_warps, transform::coalesce* reorder);
|
grids(size_t num_warps, transform::coalesce* reorder);
|
||||||
ir::metaparameter* get_param(ir::value *value, const std::string &key) { return params_[value][key]; }
|
ir::metaparameter* get_param(ir::value *value, const std::string &key) { return params_[value][key]; }
|
||||||
unsigned get_param_group(ir::value *value, unsigned ax);
|
unsigned get_param_group(ir::value *value, unsigned ax);
|
||||||
fragment_t get_fragment(ir::value *value, unsigned ax) { return fragments_.at({value, ax}); }
|
fragment_t get_fragment(ir::value *value, unsigned ax);
|
||||||
void copy(ir::value *dst, ir::value *src);
|
void copy(ir::value *dst, ir::value *src);
|
||||||
void run(ir::module &mod);
|
void run(ir::module &mod);
|
||||||
unsigned get_num_threads();
|
unsigned get_num_threads();
|
||||||
|
@@ -182,6 +182,11 @@ unsigned grids::get_param_group(ir::value *value, unsigned ax) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grids::fragment_t grids::get_fragment(ir::value *value, unsigned ax) {
|
||||||
|
return fragments_.at({value, ax});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//TODO: This shouldn't exist!
|
//TODO: This shouldn't exist!
|
||||||
void grids::copy(ir::value *dst, ir::value *src) {
|
void grids::copy(ir::value *dst, ir::value *src) {
|
||||||
params_[dst] = params_[src];
|
params_[dst] = params_[src];
|
||||||
|
@@ -723,8 +723,9 @@ void selection::create_tile(ir::value *v, IRBuilder<> &builder,
|
|||||||
if(!v->get_type()->is_tile_ty() || !seen.insert(v).second)
|
if(!v->get_type()->is_tile_ty() || !seen.insert(v).second)
|
||||||
return;
|
return;
|
||||||
if(auto *user = dynamic_cast<ir::user*>(v))
|
if(auto *user = dynamic_cast<ir::user*>(v))
|
||||||
for(ir::value *op: user->ops())
|
for(ir::value *op: user->ops()){
|
||||||
create_tile(op, builder, seen, sh_mem_ptr);
|
create_tile(op, builder, seen, sh_mem_ptr);
|
||||||
|
}
|
||||||
LLVMContext &ctx = builder.getContext();
|
LLVMContext &ctx = builder.getContext();
|
||||||
auto shapes = v->get_type()->get_tile_shapes();
|
auto shapes = v->get_type()->get_tile_shapes();
|
||||||
unsigned pad = alloc_->is_ld_padded(v);
|
unsigned pad = alloc_->is_ld_padded(v);
|
||||||
|
@@ -61,18 +61,20 @@ void coalesce::run(ir::module &mod) {
|
|||||||
std::vector<unsigned> order(max_contiguous.size());
|
std::vector<unsigned> order(max_contiguous.size());
|
||||||
std::iota(order.begin(), order.end(), 0);
|
std::iota(order.begin(), order.end(), 0);
|
||||||
std::sort(order.begin(), order.end(), [&](unsigned a, unsigned b) { return max_contiguous[a] > max_contiguous[b]; } );
|
std::sort(order.begin(), order.end(), [&](unsigned a, unsigned b) { return max_contiguous[a] > max_contiguous[b]; } );
|
||||||
std::list<ir::instruction*> work_list;
|
std::list<std::pair<ir::instruction*, ir::instruction*>> work_list;
|
||||||
if(order != order_[i])
|
if(order != order_[i])
|
||||||
work_list.push_back(i);
|
work_list.push_back({i, nullptr});
|
||||||
// rematerialize recursively
|
// rematerialize recursively
|
||||||
while(!work_list.empty()) {
|
while(!work_list.empty()) {
|
||||||
ir::instruction* current = work_list.back();
|
auto pair = work_list.back();
|
||||||
order_[current] = order;
|
ir::instruction* cloned = pair.first;
|
||||||
|
ir::instruction* original = pair.second;
|
||||||
|
order_[cloned] = order;
|
||||||
work_list.pop_back();
|
work_list.pop_back();
|
||||||
for(ir::value *op: current->ops()) {
|
for(ir::value *op: cloned->ops()) {
|
||||||
ir::instruction* i_op = dynamic_cast<ir::instruction*>(op);
|
ir::instruction* i_op = dynamic_cast<ir::instruction*>(op);
|
||||||
if(replaced.find(i_op) != replaced.end()){
|
if(replaced.find(i_op) != replaced.end()){
|
||||||
current->replace_uses_of_with(i_op, replaced.at(i_op));
|
cloned->replace_uses_of_with(i_op, replaced.at(i_op));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if(!i_op)
|
if(!i_op)
|
||||||
@@ -90,17 +92,19 @@ void coalesce::run(ir::module &mod) {
|
|||||||
continue;
|
continue;
|
||||||
if(auto* ld = dynamic_cast<ir::load_inst*>(i_op))
|
if(auto* ld = dynamic_cast<ir::load_inst*>(i_op))
|
||||||
n_op = ir::copy_to_shared_inst::create(ld);
|
n_op = ir::copy_to_shared_inst::create(ld);
|
||||||
// not a load; rematerialize and recurse
|
// not a load; rematerialize and add to worklist
|
||||||
else {
|
else {
|
||||||
n_op = i_op->clone();
|
n_op = i_op->clone();
|
||||||
work_list.push_back(n_op);
|
work_list.push_back({n_op, i_op});
|
||||||
}
|
}
|
||||||
n_op = builder.insert(n_op);
|
n_op = builder.insert(n_op);
|
||||||
replaced.insert({i_op, n_op});
|
replaced.insert({i_op, n_op});
|
||||||
order_[n_op] = order;
|
order_[n_op] = order;
|
||||||
align_->copy(n_op, i_op);
|
align_->copy(n_op, i_op);
|
||||||
// mem_->copy(n_op, i_op);
|
mem_->copy(n_op, i_op);
|
||||||
current->replace_uses_of_with(i_op, n_op);
|
if(original)
|
||||||
|
n_op->erase_use(original);
|
||||||
|
cloned->replace_uses_of_with(i_op, n_op);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -66,7 +66,7 @@ void user::replace_uses_of_with(value *before, value *after) {
|
|||||||
if(ops_[i] == before)
|
if(ops_[i] == before)
|
||||||
ops_[i] = after;
|
ops_[i] = after;
|
||||||
after->add_use(this);
|
after->add_use(this);
|
||||||
erase_use(this);
|
before->erase_use(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user