[Triton-MLIR][BACKEND] Minor fixes of shared memory in ReduceOpConversion (#924)

This commit is contained in:
Qingyi Liu
2022-11-29 11:50:31 +08:00
committed by GitHub
parent c87fbf886e
commit 661be523c0
3 changed files with 56 additions and 67 deletions

View File

@@ -58,11 +58,6 @@ SmallVector<SmallVector<unsigned>> ReduceOpHelper::getScratchConfigsFast() {
unsigned numWarps = triton::gpu::TritonGPUDialect::getNumWarps(mod);
smemShapes[1].push_back(numWarps * 32);
/// FIXME(Qingyi): This requirement is actually not necessary, because it is
/// always smaller than smemShapes[0] shared memory block2
smemShapes[2] = convertType<unsigned>(getSrcShape());
smemShapes[2].erase(smemShapes[2].begin() + axis);
return smemShapes;
}