[BACKEND] Added support for mma layouts in reductions (#863)

Validated hackily by manually modifying the reduction .ttgir in my local cache. There will be a follow-up PR adding some better testing infrastructure to test out conversions and reductions on arbitrary layouts.
2022-11-10 09:58:07 -08:00
parent 57fd1864a7
commit 2aa538ec2e
6 changed files with 469 additions and 365 deletions
--- a/include/triton/Dialect/TritonGPU/IR/Dialect.h
+++ b/include/triton/Dialect/TritonGPU/IR/Dialect.h
@@ -25,6 +25,10 @@ namespace gpu {

 unsigned getElemsPerThread(Type type);

+SmallVector<unsigned> getThreadsPerWarp(Attribute layout);
+
+SmallVector<unsigned> getWarpsPerCTA(Attribute layout);
+
 SmallVector<unsigned> getSizePerThread(Attribute layout);

 SmallVector<unsigned> getThreadsPerCTA(const Attribute &layout);
--- a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td
+++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td
@@ -326,7 +326,8 @@ def SliceEncodingAttr : DistributedEncoding<"SliceEncoding"> {
  );

  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    SmallVector<int64_t> paddedShape(ArrayRef<int64_t> shape) const;
+    template<class T>
+    SmallVector<T> paddedShape(ArrayRef<T> shape) const;
  }];
 }