[BACKEND] Added support for mma layouts in reductions (#863)
Validated hackily by manually modifying the reduction .ttgir in my local cache. There will be a follow-up PR adding some better testing infrastructure to test out conversions and reductions on arbitrary layouts.
This commit is contained in:
@@ -25,6 +25,10 @@ namespace gpu {
|
||||
|
||||
unsigned getElemsPerThread(Type type);
|
||||
|
||||
SmallVector<unsigned> getThreadsPerWarp(Attribute layout);
|
||||
|
||||
SmallVector<unsigned> getWarpsPerCTA(Attribute layout);
|
||||
|
||||
SmallVector<unsigned> getSizePerThread(Attribute layout);
|
||||
|
||||
SmallVector<unsigned> getThreadsPerCTA(const Attribute &layout);
|
||||
|
@@ -326,7 +326,8 @@ def SliceEncodingAttr : DistributedEncoding<"SliceEncoding"> {
|
||||
);
|
||||
|
||||
let extraClassDeclaration = extraBaseClassDeclaration # [{
|
||||
SmallVector<int64_t> paddedShape(ArrayRef<int64_t> shape) const;
|
||||
template<class T>
|
||||
SmallVector<T> paddedShape(ArrayRef<T> shape) const;
|
||||
}];
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user