write hipmodule bytes

This commit is contained in:
Michael Melesse
2022-10-24 17:58:25 +00:00
parent eb89e9bdd9
commit 8da4323514
3 changed files with 24 additions and 14 deletions

View File

@@ -501,6 +501,7 @@ void init_triton_codegen(py::module &&m) {
std::string hipmodule;
std::string name;
{
std::cout << "triton.cc: compile_ttir_to_amdgpu:" << std::endl;
// Scope where the GIL is released
py::gil_scoped_release allow_threads;
name = ir.get_function_list()[0]->get_name();
@@ -524,26 +525,27 @@ void init_triton_codegen(py::module &&m) {
int version;
// std::string ptxas_path = drv::path_to_ptxas(version);
// Triton-IR -> AMDGCN LLVM-IR
std::cout << "\t" << ttir.str() << std::endl;
std::cout << "\t" << tmp << std::endl;
triton::codegen::amd_cl_target target;
auto llvm = triton::codegen::add_passes_to_emit_bin(
ir, ctx, &target, num_warps, num_stages, n_shared_bytes, extern_lib_map);
llvm::raw_string_ostream llir(tmp);
llir << *llvm;
llir.flush();
// LLVM-IR -> AMD HSACO
// LLVM-IR -> AMDGPU
std::string amdgpu = drv::llir_to_amdgpu(llvm.get(), "gfx90a");
// HSACO -> GCN
std::cout << "amdgpu = " << amdgpu << std::endl;
// AMDGPU -> Binary
hipModule_t hipmodule = drv::amdgpu_to_hipmodule(amdgpu);
std::cout << "hipmodule = " << hipmodule << std::endl;
}
asm_map_t asm_map;
asm_map["ttir"] = py::cast(ttir.str());
asm_map["llir"] = py::cast(tmp);
asm_map["amdgpu"] = py::cast(amdgpu);
if(!hipmodule.empty()){
py::bytes bytes(hipmodule);
asm_map["hipmodule"] = bytes;
}
asm_map["hipmodule"] = py::bytes(hipmodule);
return std::make_tuple(name, asm_map, n_shared_bytes);
},
py::return_value_policy::take_ownership);

View File

@@ -893,7 +893,7 @@ def _compile(fn, signature: str, device: int = -1, constants=dict(),
if output == "ttir":
return module
assert (output == "cubin" or output == "hsaco")
assert (output == "cubin" or output == "hipmodule")
if torch.version.hip is not None:
backend = _triton.runtime.backend.ROCM
else:
@@ -1285,15 +1285,23 @@ def compile(fn, signature: str, device: int = -1, constants=dict(), num_warps: i
if torch.version.hip is not None:
asm, shared, kernel_name = _compile(fn, signature, device, constants, configs[0], num_warps, num_stages,
extern_libs, "hsaco", cc)
extern_libs, "hipmodule", cc)
# cache AMD assembly and binary
fn_cache_manager.put(asm["hipmodule"], cubin_name)
fn_cache_manager.put(asm["amdgpu"], ptx_name, binary=False)
else:
asm, shared, kernel_name = _compile(fn, signature, device, constants, configs[0], num_warps, num_stages,
extern_libs, "cubin", cc)
metadata = {"name": kernel_name, "shared": shared, "num_warps": num_warps, "num_stages": num_stages}
fn_cache_manager.put(asm["cubin"], cubin_name)
fn_cache_manager.put(asm["ptx"], ptx_name, binary=False)
# cache Nvidia assembly and binary
fn_cache_manager.put(asm["cubin"], cubin_name)
fn_cache_manager.put(asm["ptx"], ptx_name, binary=False)
# cache triton and llvm ir
fn_cache_manager.put(asm["ttir"], ttir_name, binary=False)
fn_cache_manager.put(asm["llir"], llir_name, binary=False)
# cache metadata
metadata = {"name": kernel_name, "shared": shared, "num_warps": num_warps, "num_stages": num_stages}
fn_cache_manager.put(json.dumps(metadata), data_name, binary=False)
if warm_cache_only:

View File

@@ -10,8 +10,8 @@ chmod -R 777 $LOG_DIR
bash scripts/amd/clean.sh
# bash scripts/amd/deps.sh
bash scripts/amd/build.sh
# bash scripts/amd/test.sh 2>&1 |tee $LOG_DIR/test.log
bash scripts/amd/debug.sh
bash scripts/amd/test.sh 2>&1 |tee $LOG_DIR/test.log
# bash scripts/amd/debug.sh
# bash scripts/amd/backtrace.sh 2>&1 |tee $LOG_DIR/backtrace.log