[OPS] Faster and cleaner block-sparse implementation (#311)

This commit is contained in:
Philippe Tillet
2021-09-27 18:25:16 -07:00
committed by GitHub
parent c3756d1c33
commit 2c287544cb
8 changed files with 518 additions and 582 deletions

View File

@@ -2500,9 +2500,8 @@ void generator::visit_layout_shared(analysis::shared_layout* layout) {
void generator::visit_basic_block(ir::basic_block * block) {
BasicBlock *parent = bbs_[block];
builder_->SetInsertPoint(parent);
for(ir::instruction *i: block->get_inst_list()){
for(ir::instruction *i: block->get_inst_list())
visit_value(i);
}
// Update ir bb -> llvm bb mapping
bbs_[block] = builder_->GetInsertBlock();
}

View File

@@ -117,7 +117,7 @@ void coalesce::run(ir::module &mod) {
queue.push_back(inst_op);
}
}
if(in_contig.empty() || out_contig==in_contig)
if(in_contig.size() <= 1 || out_contig==in_contig)
continue;
builder.set_insert_point_after(val_inst);
auto new_val = builder.insert(ir::cvt_layout_inst::create(val_inst));

View File

@@ -188,7 +188,7 @@ void pipeline::run(ir::module &mod) {
builder.set_insert_point(block->get_first_non_phi());
std::map<ir::phi_node*, ir::value*> load_ivs;
std::map<ir::phi_node*, ir::value*> next_load_ivs;
for (ir::phi_node* iv : induction_vars) {
for (auto& [iv, val] : prev_phi_vals) {
ir::phi_node* pn = builder.create_phi(iv->get_type(), 2);
pn->add_incoming(prev_phi_vals[iv], header);
load_ivs[iv] = pn;
@@ -198,7 +198,8 @@ void pipeline::run(ir::module &mod) {
// pre-fetch next iteration
builder.set_insert_point(block->get_inst_list().back());
ir::value* next_ptr = ptr->get_value_for_block(block);
// ir::value* next_ptr = ptr->get_value_for_block(block);
ir::value* next_ptr = rematerialize_vals(builder, block, ptr->get_value_for_block(block), load_ivs);
ir::value* next_mask = builder.create_splat(
rematerialize_vals(builder, block, block_cond, load_ivs), ty->get_block_shapes());
if (auto* masked_load = dynamic_cast<ir::masked_load_inst*>(load)) {