import torch import triton from triton.code_gen import JITFunction import triton.language as tl import os import shutil tmpdir = ".tmp" @triton.jit def function_1(i): i = i + 1 i = function_2(i) return i @triton.jit def function_2(i): i = i + 1 return i @triton.jit def kernel(X, i, BLOCK: tl.constexpr): i = i + 1 i = function_1(i) tl.store(X, i) def apply_src_change(target, old, new): delattr(kernel.fn, 'hash') delattr(function_1.fn, 'hash') delattr(function_2.fn, 'hash') function_1.src = function_1.src.replace(old, new) target.src = target.src.replace(old, new) ret = target.cache_key target.src = target.src.replace(new, old) return ret def test_nochange(): baseline = kernel.cache_key updated = apply_src_change(kernel, 'i + 1', 'i + 1') assert baseline == updated def test_toplevel_change(): baseline = kernel.cache_key updated = apply_src_change(kernel, 'i + 1', 'i + 2') assert baseline != updated def test_nested1_change(): baseline = kernel.cache_key updated = apply_src_change(function_1, 'i + 1', 'i + 2') assert baseline != updated def test_reuse(): counter = 0 def inc_counter(key, binary): nonlocal counter counter += 1 os.environ["TRITON_CACHE_DIR"] = tmpdir if os.path.exists(tmpdir): shutil.rmtree(tmpdir) JITFunction.cache_hook = inc_counter x = torch.empty(1, dtype=torch.int32, device='cuda') for i in range(10): kernel[(1,)](x, 43, BLOCK=1024) assert counter == 1