[CODEGEN] Improvements and bugfixes (#463)

This commit is contained in:
Philippe Tillet
2022-02-24 14:56:24 -08:00
committed by GitHub
parent a9dfdcaaa9
commit 98ed7db8c1
14 changed files with 154 additions and 81 deletions

View File

@@ -472,7 +472,7 @@ std::tuple<std::string, asm_map_t, int> cu_compile_ttir(const std::string& name,
size_t minor = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>(dev);
size_t cc = major*10 + minor;
int version;
drv::dispatch::cuDriverGetVersion(&version);
std::string ptxas_path = drv::path_to_ptxas(version);
// Triton-IR -> NVPTX LLVM-IR
triton::codegen::nvidia_cu_target target(cc);
auto llvm = triton::codegen::add_passes_to_emit_bin(ir, ctx, &target, cc, num_warps, num_stages, n_shared_bytes);
@@ -485,7 +485,7 @@ std::tuple<std::string, asm_map_t, int> cu_compile_ttir(const std::string& name,
std::string ptx = drv::llir_to_ptx(llvm.get(), cc, version);
asm_map["ptx"] = py::cast(ptx);
// PTX -> Binary
std::string cubin = drv::ptx_to_cubin(ptx, cc);
std::string cubin = drv::ptx_to_cubin(ptx, ptxas_path, cc);
if(!cubin.empty()){
py::bytes bytes(cubin);
asm_map["cubin"] = bytes;

View File

@@ -556,7 +556,7 @@ def dot(input, other, allow_tf32=True, _builder=None):
@builtin
def load(pointer, mask=None, other=None, cache_modifier="", volatile=False, _builder=None):
def load(pointer, mask=None, other=None, cache_modifier="", eviction_policy="", volatile=False, _builder=None):
"""
Return a block of data whose values are, elementwise, loaded from memory at location defined by :code:`pointer`.
@@ -573,7 +573,7 @@ def load(pointer, mask=None, other=None, cache_modifier="", volatile=False, _bui
:param cache_modifier: changes cache option in nvidia ptx
'type cache_modifier: str, optional
"""
return frontend.load(pointer, mask, other, cache_modifier, volatile, _builder)
return frontend.load(pointer, mask, other, cache_modifier, eviction_policy, volatile, _builder)
@builtin