[OPTIMIZER] Improved layout simplification pass so it handles swizzled layouts better (#789)

Note: uncommented `test_gemm`, since backend has an issue with swizzling. This will get uncommented in a subsequent PR.
2022-10-20 19:03:37 -07:00
parent 0d22d2bc03
commit dc0588a898
7 changed files with 68 additions and 28 deletions
--- a/lib/Dialect/TritonGPU/Transforms/Coalesce.cpp
+++ b/lib/Dialect/TritonGPU/Transforms/Coalesce.cpp
@@ -71,7 +71,8 @@ struct CoalescePass : public TritonGPUCoalesceBase<CoalescePass> {
    // convert operands
    SmallVector<Value, 4> newArgs;
    for (auto v : op->getOperands()) {
-      if (v.getType().isa<RankedTensorType>())
+      auto vTy = v.getType().dyn_cast<RankedTensorType>();
+      if (vTy && !vTy.getEncoding().isa<triton::gpu::SharedEncodingAttr>())
        newArgs.push_back(builder.create<triton::gpu::ConvertLayoutOp>(
            op->getLoc(), convertType(v.getType()), v));
      else