[CODEGEN] Fixed performance regression on vectorized loads (#259)

This commit is contained in:
Philippe Tillet
2021-09-02 01:07:31 -07:00
committed by GitHub
parent 768e0ded28
commit 8a882b215f

View File

@@ -586,12 +586,15 @@ void generator::visit_load_inst(ir::load_inst* x){
Type* ty = cvt(op->get_type()->get_scalar_ty()->get_pointer_element_ty()); Type* ty = cvt(op->get_type()->get_scalar_ty()->get_pointer_element_ty());
// compute vector width // compute vector width
size_t vec = 1; size_t vec = 1;
if(op->get_type()->is_block_ty() && op->get_type()->get_tile_rank() > 1){ if(op->get_type()->is_block_ty()){
auto ord = ords_.at(op); auto ord = ords_.at(op);
size_t aln = alignment_->get(op, ord[0]); size_t aln = alignment_->get(op, ord[0]);
size_t nts = layouts_->get(x)->to_scanline()->nts(ord[0]); auto layout = layouts_->get(x)->to_scanline();
if(layout){
size_t nts = layout->nts(ord[0]);
vec = std::min(nts, aln); vec = std::min(nts, aln);
} }
}
// code generation // code generation
auto idxs = idxs_.at(x); auto idxs = idxs_.at(x);
for(size_t i = 0; i < idxs.size(); i += vec){ for(size_t i = 0; i < idxs.size(); i += vec){