[codegen/selection] bugfix in code generation for reduction instructions

This commit is contained in:
Philippe Tillet
2019-07-08 18:53:37 -07:00
parent f9db0449b7
commit fa3270dcf2
2 changed files with 4 additions and 3 deletions

View File

@@ -787,8 +787,8 @@ void selection::lower_tile_instruction(ir::instruction *ins, llvm::IRBuilder<> &
// reduce within warp
Value *shfl = Intrinsic::getDeclaration(builder.GetInsertBlock()->getModule(), Intrinsic::nvvm_shfl_sync_bfly_f32);
for (int i = 16; i > 0; i >>= 1){
Value *rhs = builder.CreateCall(shfl, {builder.getInt32(0x1f), partial,
builder.getInt32(i), builder.getInt32(0xffffffff)});
Value *rhs = builder.CreateCall(shfl, {builder.getInt32(0xffffffff), partial,
builder.getInt32(i), builder.getInt32(0x1f)});
partial = builder.CreateFAdd(partial, rhs);
}
// reduce within block