[CODEGEN] Switching to predicated inline PTX for LDGs (#103)

This commit is contained in:
Philippe Tillet
2021-05-09 21:59:25 -04:00
committed by Philippe Tillet
parent ac57812bdc
commit 1e844ba78d
2 changed files with 114 additions and 31 deletions

View File

@@ -81,7 +81,7 @@ cu_kernel::cu_kernel(driver::module *program, const char * name) : kernel(progra
dispatch::cuFuncGetAttribute(&shared_static, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *cu_);
dispatch::cuFuncGetAttribute(&n_spills, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, *cu_);
dispatch::cuFuncGetAttribute(&n_reg, CU_FUNC_ATTRIBUTE_NUM_REGS, *cu_);
std::cout << n_reg << std::endl;
// std::cout << n_reg << std::endl;
if (shared_optin > 49152){
// std::cout << "dynamic shared memory " << shared_optin << " " << shared_static << std::endl;
dispatch::cuFuncSetAttribute(*cu_, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, shared_optin - shared_static);