[OPTIMIZER] Fix Num in AsyncWaitOp generated by the pipeline pass (#72)

This commit is contained in:
Da Yan
2022-08-23 06:58:10 +08:00
committed by GitHub
parent 10ba51c3bb
commit 92ef552a54
2 changed files with 5 additions and 4 deletions

View File

@@ -337,8 +337,9 @@ scf::ForOp LoopPipeliner::createNewForOp() {
for (Operation &op : forOp.getBody()->without_terminator()) {
if (!asyncWaitInserted && isDirectUserOfAsyncLoad(op)) {
asyncWaitInserted = true;
assert(numStages >= 2);
builder.create<triton::gpu::AsyncWaitOp>(op.getLoc(),
loads.size() * (numStages - 1));
loads.size() * (numStages - 2));
}
Operation *newOp = builder.clone(op, mapping);
// update mapping of results