[code generation] added double-buffering

This commit is contained in:
Philippe Tillet
2019-02-12 11:47:52 -05:00
parent e45d6bbb60
commit 41aad4800c
8 changed files with 204 additions and 47 deletions

View File

@@ -384,17 +384,42 @@ void selection::create_tile(ir::value *v, IRBuilder<> &builder,
// shared copy
PointerType *ptr_ty = ty->getPointerTo(sh_mem_ptr->getType()->getPointerAddressSpace());
if(dynamic_cast<ir::copy_to_shared_inst*>(v)) {
size_t offset = alloc_->get_offset(v);
Value *ptr = builder.CreateGEP(sh_mem_ptr, builder.getInt32(offset));
ptr = builder.CreateBitCast(ptr, ptr_ty);
tmap_.insert({v, new shared_tile(ty, shapes, ptr, builder)});
if(buffer_info_->get_reference(v) == nullptr){
size_t offset = alloc_->get_offset(v);
Value *ptr = builder.CreateGEP(sh_mem_ptr, builder.getInt32(offset));
ptr = builder.CreateBitCast(ptr, ptr_ty);
tmap_.insert({v, new shared_tile(ty, shapes, ptr, builder)});
}
}
// phi-node (double-buffering)
else if(auto *phi = dynamic_cast<ir::phi_node*>(v)) {
BasicBlock *parent = (BasicBlock*)vmap_[phi->get_parent()];
builder.SetInsertPoint(parent);
unsigned id_pre = 0, id_loop = 1;
if(phi->get_incoming_block(0) == phi->get_parent())
std::swap(id_pre, id_loop);
ir::value *pre_value = phi->get_incoming_value(id_pre);
ir::value *loop_value = phi->get_incoming_value(id_loop);
BasicBlock *pre_block = (BasicBlock*)vmap_[phi->get_incoming_block(id_pre)];
BasicBlock *loop_block = (BasicBlock*)vmap_[phi->get_incoming_block(id_loop)];
if(parent->empty())
builder.SetInsertPoint(parent);
else
builder.SetInsertPoint(&*parent->getFirstInsertionPt());
PHINode *ptr = builder.CreatePHI(ptr_ty, 2);
// offset
PHINode *offset = builder.CreatePHI(builder.getInt32Ty(), 2);
Value *next_offset = builder.CreateNeg(offset);
offset->addIncoming(builder.getInt32(alloc_->get_num_bytes(phi) / 2 / 4), pre_block);
offset->addIncoming(next_offset, loop_block);
// next pointer
Value *pre_ptr = builder.CreateGEP(sh_mem_ptr, builder.getInt32(alloc_->get_offset(phi)));
pre_ptr = builder.CreateBitCast(pre_ptr, ptr->getType());
Value *next_ptr = builder.CreateGEP(ptr, offset);
ptr->addIncoming(pre_ptr, pre_block);
ptr->addIncoming(next_ptr, loop_block);
tmap_.insert({v, new shared_tile(ty, shapes, ptr, builder)});
tmap_.insert({pre_value, new shared_tile(ty, shapes, pre_ptr, builder)});
tmap_.insert({loop_value, new shared_tile(ty, shapes, next_ptr, builder)});
}
else
throw std::runtime_error("unknown shared memory tile");
@@ -633,46 +658,21 @@ void selection::run(ir::module &src, Module &dst){
init_grids(fn, dst_builder, sh_mem_ptr);
// iterate through block
for(ir::basic_block *block: fn->blocks()) {
dst_builder.SetInsertPoint((BasicBlock*)vmap_[block]);
for(ir::instruction *i: block->get_inst_list())
BasicBlock *parent = (BasicBlock*)vmap_[block];
dst_builder.SetInsertPoint(parent);
for(ir::instruction *i: block->get_inst_list()){
if(dynamic_cast<ir::phi_node*>(i))
dst_builder.SetInsertPoint(&*parent->getFirstInsertionPt());
lower_instruction(i, dst_builder);
if(dynamic_cast<ir::phi_node*>(i))
dst_builder.SetInsertPoint(parent);
}
}
// add phi operands
for(ir::basic_block *block: fn->blocks())
for(ir::instruction *inst: block->get_inst_list())
if(auto *phi = dynamic_cast<ir::phi_node*>(inst)){
if(buffer_info_->is_shared(phi)) {
BasicBlock *parent = (BasicBlock*)vmap_[phi->get_parent()];
unsigned id_pre = 0, id_loop = 1;
if(phi->get_incoming_block(0) == phi->get_parent())
std::swap(id_pre, id_loop);
ir::value *pre_value = phi->get_incoming_value(id_pre);
ir::value *loop_value = phi->get_incoming_value(id_loop);
BasicBlock *pre_block = (BasicBlock*)vmap_[phi->get_incoming_block(id_pre)];
BasicBlock *loop_block = (BasicBlock*)vmap_[phi->get_incoming_block(id_loop)];
int pre_offset = alloc_->get_offset(pre_value);
int loop_offset = alloc_->get_offset(loop_value);
dst_builder.SetInsertPoint(&*parent->getFirstInsertionPt());
PHINode *ptr = (PHINode*)(((shared_tile*)tmap_.at(phi))->get_pointer());
// offset
PHINode *offset = dst_builder.CreatePHI(dst_builder.getInt32Ty(), 2);
dst_builder.SetInsertPoint(parent->getFirstNonPHI());
Value *next_offset = dst_builder.CreateNeg(offset);
offset->addIncoming(dst_builder.getInt32((loop_offset - pre_offset)/4), pre_block);
offset->addIncoming(next_offset, loop_block);
// next pointer
Value *pre_ptr = dst_builder.CreateGEP(sh_mem_ptr, dst_builder.getInt32(pre_offset));
pre_ptr = dst_builder.CreateBitCast(pre_ptr, ptr->getType());
Value *next_ptr = dst_builder.CreateGEP(ptr, offset);
ptr->addIncoming(pre_ptr, pre_block);
ptr->addIncoming(next_ptr, loop_block);
// barrier
Function *barrier = Intrinsic::getDeclaration(dst_fn->getParent(), Intrinsic::nvvm_barrier0);
dst_builder.SetInsertPoint(pre_block->getTerminator());
dst_builder.CreateCall(barrier, {});
dst_builder.SetInsertPoint(loop_block->getTerminator());
dst_builder.CreateCall(barrier, {});
continue;
}
for(unsigned n = 0; n < phi->get_num_incoming(); n++){