- A100 support via mma.16816 - Thread swizzling for conflict-free shared memory accesses without padding - Complete overhaul of the LLVM code generation in codegen/selection/generator.cc to remove overengineering - Added debugging capabilities in the Python binding - Compilation error for kernels that spill
27 lines
282 B
C++
27 lines
282 B
C++
#ifndef TRITON_INCLUDE_IR_CODEGEN_REORDER_H
|
|
#define TRITON_INCLUDE_IR_CODEGEN_REORDER_H
|
|
|
|
namespace triton {
|
|
|
|
// forward declaration
|
|
namespace ir {
|
|
class module;
|
|
}
|
|
|
|
namespace codegen{
|
|
|
|
namespace transform{
|
|
|
|
class reorder {
|
|
public:
|
|
void run(ir::module& module);
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|