Files
triton/include/triton/codegen/transform/cts.h
Philippe Tillet 083bbd1e8d [GENERAL] Merged v1.0alpha into master. Added features are:
- A100 support via mma.16816
- Thread swizzling for conflict-free shared memory accesses without
padding
- Complete overhaul of the LLVM code generation in
codegen/selection/generator.cc to remove overengineering
- Added debugging capabilities in the Python binding
- Compilation error for kernels that spill
2021-07-27 12:38:48 -07:00

37 lines
548 B
C++

#ifndef TDL_INCLUDE_CODEGEN_BUFFER_INFO_PASS_H
#define TDL_INCLUDE_CODEGEN_BUFFER_INFO_PASS_H
#include <set>
#include <map>
namespace triton {
namespace ir {
class module;
class value;
class phi_node;
class instruction;
class builder;
}
namespace codegen{
namespace transform{
class cts {
private:
void add_copy(ir::instruction *parent, ir::value *x, ir::builder &builder, bool to_shared);
public:
cts(bool use_async = false): use_async_(use_async) {}
void run(ir::module &mod);
private:
bool use_async_;
};
}
}
}
#endif