[Triton-MLIR][BACKEND] Pass compute capability from the frontend and code cleanup (#961)

This commit is contained in:
Keren Zhou
2022-12-07 15:03:46 -08:00
committed by GitHub
parent 4eab9dcedf
commit 18e683d9bb
4 changed files with 17 additions and 20 deletions

View File

@@ -4681,8 +4681,7 @@ private:
// capability does not support async copy, then we do decompose
if (triton::gpu::InsertSliceAsyncOp::getEligibleLoadByteWidth(
computeCapability)
.contains(byteWidth) &&
computeCapability >= 80)
.contains(byteWidth))
return;
// load
@@ -4716,13 +4715,8 @@ private:
// async wait is supported in Ampere and later
mod.walk([&](triton::gpu::AsyncWaitOp asyncWaitOp) -> void {
if (computeCapability < 80) {
asyncWaitOp.erase();
} else if (decomposed) {
OpBuilder builder(asyncWaitOp);
// Wait for all previous async ops
auto newAsyncWaitOp = builder.create<triton::gpu::AsyncWaitOp>(
asyncWaitOp.getLoc(), builder.getI64IntegerAttr(0));
if (!triton::gpu::AsyncWaitOp::isSupported(computeCapability) ||
decomposed) {
asyncWaitOp.erase();
}
});