[Triton-MLIR][BACKEND] Pass compute capability from the frontend and code cleanup (#961)
This commit is contained in:
@@ -4681,8 +4681,7 @@ private:
|
||||
// capability does not support async copy, then we do decompose
|
||||
if (triton::gpu::InsertSliceAsyncOp::getEligibleLoadByteWidth(
|
||||
computeCapability)
|
||||
.contains(byteWidth) &&
|
||||
computeCapability >= 80)
|
||||
.contains(byteWidth))
|
||||
return;
|
||||
|
||||
// load
|
||||
@@ -4716,13 +4715,8 @@ private:
|
||||
|
||||
// async wait is supported in Ampere and later
|
||||
mod.walk([&](triton::gpu::AsyncWaitOp asyncWaitOp) -> void {
|
||||
if (computeCapability < 80) {
|
||||
asyncWaitOp.erase();
|
||||
} else if (decomposed) {
|
||||
OpBuilder builder(asyncWaitOp);
|
||||
// Wait for all previous async ops
|
||||
auto newAsyncWaitOp = builder.create<triton::gpu::AsyncWaitOp>(
|
||||
asyncWaitOp.getLoc(), builder.getI64IntegerAttr(0));
|
||||
if (!triton::gpu::AsyncWaitOp::isSupported(computeCapability) ||
|
||||
decomposed) {
|
||||
asyncWaitOp.erase();
|
||||
}
|
||||
});
|
||||
|
Reference in New Issue
Block a user