1, fix the order in linearize/delinearize, which fix the error of order in emitIndices; 2, fix the selecting of fast implementation in reduce codegen; 3, fix the redundant barrier in reduce codegen; 4, fix the index mapping of the second round of warp_shuffle in shuffle version of reduce codegen. Co-authored-by: Keren Zhou <kerenzhou@openai.com>
38 lines
968 B
C++
38 lines
968 B
C++
#ifndef TRITON_ANALYSIS_UTILITY_H
|
|
#define TRITON_ANALYSIS_UTILITY_H
|
|
|
|
#include "triton/Dialect/TritonGPU/IR/Dialect.h"
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <string>
|
|
|
|
namespace mlir {
|
|
|
|
bool isSharedEncoding(Value value);
|
|
|
|
bool maybeSharedAllocationOp(Operation *op);
|
|
|
|
std::string getValueOperandName(Value value, AsmState &state);
|
|
|
|
template <typename Int> Int product(llvm::ArrayRef<Int> arr) {
|
|
return std::accumulate(arr.begin(), arr.end(), 1, std::multiplies{});
|
|
}
|
|
|
|
template <typename Int> Int ceil(Int m, Int n) { return (m + n - 1) / n; }
|
|
|
|
// output[i] = input[order[i]]
|
|
template <typename T, typename RES_T = T>
|
|
SmallVector<RES_T> reorder(ArrayRef<T> input, ArrayRef<unsigned> order) {
|
|
size_t rank = order.size();
|
|
assert(input.size() == rank);
|
|
SmallVector<RES_T> result(rank);
|
|
for (auto it : llvm::enumerate(order)) {
|
|
result[it.index()] = input[it.value()];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
} // namespace mlir
|
|
|
|
#endif // TRITON_ANALYSIS_UTILITY_H
|