[Triton-MLIR][Backend] Fix the order in linear/delinear and a few bugs in reduce conversion (#851)
1, fix the order in linearize/delinearize, which fix the error of order in emitIndices; 2, fix the selecting of fast implementation in reduce codegen; 3, fix the redundant barrier in reduce codegen; 4, fix the index mapping of the second round of warp_shuffle in shuffle version of reduce codegen. Co-authored-by: Keren Zhou <kerenzhou@openai.com>
This commit is contained in:
@@ -21,11 +21,11 @@ template <typename Int> Int product(llvm::ArrayRef<Int> arr) {
|
||||
template <typename Int> Int ceil(Int m, Int n) { return (m + n - 1) / n; }
|
||||
|
||||
// output[i] = input[order[i]]
|
||||
template <typename T>
|
||||
SmallVector<T> reorder(ArrayRef<T> input, ArrayRef<unsigned> order) {
|
||||
template <typename T, typename RES_T = T>
|
||||
SmallVector<RES_T> reorder(ArrayRef<T> input, ArrayRef<unsigned> order) {
|
||||
size_t rank = order.size();
|
||||
assert(input.size() == rank);
|
||||
SmallVector<T> result(rank);
|
||||
SmallVector<RES_T> result(rank);
|
||||
for (auto it : llvm::enumerate(order)) {
|
||||
result[it.index()] = input[it.value()];
|
||||
}
|
||||
|
Reference in New Issue
Block a user