[FRONTEND] Backport new runtime from master (#706)

This PR merges the new runtime back into the `triton-mlir` branch. This adds caching and just-in-time compilation functionality to the triton-mlir project, and paves the way for re-using tests from the master branch.
2022-09-23 16:09:43 -07:00
parent ecd1bc33df
commit 22ec22c257
13 changed files with 790 additions and 419 deletions
--- a/python/tests/test_vecadd_no_scf.py
+++ b/python/tests/test_vecadd_no_scf.py
@@ -3,7 +3,6 @@ from torch.testing import assert_allclose

 import triton
 import triton.language as tl
-import triton.runtime as runtime


 def vecadd_no_scf_tester(num_warps, block_size):
@@ -22,27 +21,13 @@ def vecadd_no_scf_tester(num_warps, block_size):
        z_ptrs = z_ptr + offset
        tl.store(z_ptrs, z)

-    torch.zeros([10], device=torch.device('cuda'))
-    device = torch.cuda.current_device()
-    binary = runtime.build_kernel(kernel, "*fp32,*fp32,*fp32,i32",
-                                  constants={"BLOCK_SIZE_N": block_size},
-                                  num_warps=num_warps,
-                                  num_stages=3)
-
    x = torch.randn((block_size,), device='cuda', dtype=torch.float32)
    y = torch.randn((block_size,), device='cuda', dtype=torch.float32)
    z = torch.empty((block_size,), device=x.device, dtype=x.dtype)

-    assert x.shape.numel() % block_size == 0, "Only test load without mask here"
    grid = lambda EA: (x.shape.numel() // block_size,)
+    kernel[grid](x_ptr=x, y_ptr=y, z_ptr=z, BLOCK_SIZE_N=block_size, num_warps=num_warps)

-    runtime.launch_kernel(kernel=binary,
-                          grid=grid,
-                          device=device,
-                          x_ptr=x,
-                          y_ptr=y,
-                          z_ptr=z,
-                          BLOCK_SIZE_N=tl.constexpr(block_size))
    golden_z = x + y
    assert_allclose(z, golden_z, rtol=1e-7, atol=1e-7)