[Triton-MLIR] Two fixes on allocation and backend related with MMA v1 (#930)

2022-11-30 17:27:26 +08:00
parent 9bb54402b3
commit 4e6a8209ed
5 changed files with 47 additions and 23 deletions
--- a/lib/Dialect/TritonGPU/IR/Dialect.cpp
+++ b/lib/Dialect/TritonGPU/IR/Dialect.cpp
@@ -109,6 +109,8 @@ SmallVector<unsigned> getSizePerThread(Attribute layout) {
    if (mmaLayout.getVersion() == 2) {
      return {2, 2};
    } else if (mmaLayout.getVersion() == 1) {
+      // Note: here the definition of sizePerThread is obscure, which doesn't
+      // mean vecSize=4 can be supported in the last dimension.
      return {2, 4};
    } else {
      llvm_unreachable("Unexpected mma version");
@@ -140,6 +142,15 @@ SmallVector<unsigned> getSizePerThread(Attribute layout) {
  }
 }

+SmallVector<unsigned> getContigPerThread(Attribute layout) {
+  if (auto mmaLayout = layout.dyn_cast<MmaEncodingAttr>()) {
+    assert(mmaLayout.getVersion() == 1 || mmaLayout.getVersion() == 2);
+    return {1, 2};
+  } else {
+    return getSizePerThread(layout);
+  }
+}
+
 SmallVector<unsigned> getThreadsPerCTA(const Attribute &layout) {
  SmallVector<unsigned> threads;
  if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>()) {