[FRONTEND] Added compilation flag to force use of .nc cache modifier (#134)

in DRAM loads. /!\ USE CAREFULLY - THIS CAN BREAK CORRECTNESS IF MISUSED
/!\
This commit is contained in:
Philippe Tillet
2021-07-20 17:58:06 -04:00
committed by Philippe Tillet
parent 2824345065
commit 01276b5153
7 changed files with 27 additions and 18 deletions

View File

@@ -26,7 +26,7 @@ namespace codegen {
// TODO:
// There should be a proper pass manager there!
void add_passes_to_emit_bin(ir::module &ir, driver::device *dev, int num_warps, int num_stages,
void add_passes_to_emit_bin(ir::module &ir, driver::device *dev, int num_warps, int num_stages, bool force_nc_cache,
driver::module *&mod, driver::kernel *&ker, size_t &shared_mem) {
// generate llvm code
llvm::LLVMContext ctx;
@@ -51,7 +51,7 @@ void add_passes_to_emit_bin(ir::module &ir, driver::device *dev, int num_warps,
codegen::transform::coalesce coalesce(&align, &layouts);
codegen::transform::prefetch prefetch_s(target.get());
codegen::transform::membar barriers(&liveness, &layouts, &allocation, &prefetch_s, target.get());
codegen::generator isel(&axes, &layouts, &align, &allocation, &swizzle, target.get(), num_warps);
codegen::generator isel(&axes, &layouts, &align, &allocation, &swizzle, target.get(), num_warps, force_nc_cache);
// run passes
dce.run(ir);
peephole.run(ir);

View File

@@ -197,9 +197,9 @@ generator::generator(analysis::axes *a_axes,
analysis::allocation *alloc,
analysis::swizzle *swizzle,
target *tgt,
unsigned num_warps)
unsigned num_warps, bool force_nc_cache)
: a_axes_(a_axes), layouts_(layouts), alignment_(alignment), alloc_(alloc), swizzle_(swizzle),
tgt_(tgt), num_warps_(num_warps), add(&builder_), mul(&builder_), gep(&builder_) {
tgt_(tgt), num_warps_(num_warps), force_nc_cache_(force_nc_cache), add(&builder_), mul(&builder_), gep(&builder_) {
}
@@ -626,7 +626,10 @@ void generator::visit_load_inst(ir::load_inst* x){
// -----
std::ostringstream asm_oss;
asm_oss << "@$" << n_words; // predicate
asm_oss << " ld.global.cg";
if(force_nc_cache_)
asm_oss << " ld.global.nc";
else
asm_oss << " ld.global.cg";
if(n_words > 1)
asm_oss << ".v" << n_words; // vector width
asm_oss << ".b" << width; // word size