[FRONTEND] Added compilation flag to force use of .nc
cache modifier (#134)
in DRAM loads. /!\ USE CAREFULLY - THIS CAN BREAK CORRECTNESS IF MISUSED /!\
This commit is contained in:
committed by
Philippe Tillet
parent
2824345065
commit
01276b5153
@@ -26,7 +26,7 @@ namespace codegen {
|
||||
|
||||
// TODO:
|
||||
// There should be a proper pass manager there!
|
||||
void add_passes_to_emit_bin(ir::module &ir, driver::device *dev, int num_warps, int num_stages,
|
||||
void add_passes_to_emit_bin(ir::module &ir, driver::device *dev, int num_warps, int num_stages, bool force_nc_cache,
|
||||
driver::module *&mod, driver::kernel *&ker, size_t &shared_mem) {
|
||||
// generate llvm code
|
||||
llvm::LLVMContext ctx;
|
||||
@@ -51,7 +51,7 @@ void add_passes_to_emit_bin(ir::module &ir, driver::device *dev, int num_warps,
|
||||
codegen::transform::coalesce coalesce(&align, &layouts);
|
||||
codegen::transform::prefetch prefetch_s(target.get());
|
||||
codegen::transform::membar barriers(&liveness, &layouts, &allocation, &prefetch_s, target.get());
|
||||
codegen::generator isel(&axes, &layouts, &align, &allocation, &swizzle, target.get(), num_warps);
|
||||
codegen::generator isel(&axes, &layouts, &align, &allocation, &swizzle, target.get(), num_warps, force_nc_cache);
|
||||
// run passes
|
||||
dce.run(ir);
|
||||
peephole.run(ir);
|
||||
|
@@ -197,9 +197,9 @@ generator::generator(analysis::axes *a_axes,
|
||||
analysis::allocation *alloc,
|
||||
analysis::swizzle *swizzle,
|
||||
target *tgt,
|
||||
unsigned num_warps)
|
||||
unsigned num_warps, bool force_nc_cache)
|
||||
: a_axes_(a_axes), layouts_(layouts), alignment_(alignment), alloc_(alloc), swizzle_(swizzle),
|
||||
tgt_(tgt), num_warps_(num_warps), add(&builder_), mul(&builder_), gep(&builder_) {
|
||||
tgt_(tgt), num_warps_(num_warps), force_nc_cache_(force_nc_cache), add(&builder_), mul(&builder_), gep(&builder_) {
|
||||
|
||||
}
|
||||
|
||||
@@ -626,7 +626,10 @@ void generator::visit_load_inst(ir::load_inst* x){
|
||||
// -----
|
||||
std::ostringstream asm_oss;
|
||||
asm_oss << "@$" << n_words; // predicate
|
||||
asm_oss << " ld.global.cg";
|
||||
if(force_nc_cache_)
|
||||
asm_oss << " ld.global.nc";
|
||||
else
|
||||
asm_oss << " ld.global.cg";
|
||||
if(n_words > 1)
|
||||
asm_oss << ".v" << n_words; // vector width
|
||||
asm_oss << ".b" << width; // word size
|
||||
|
Reference in New Issue
Block a user