This PR merges the new runtime back into the `triton-mlir` branch. This adds caching and just-in-time compilation functionality to the triton-mlir project, and paves the way for re-using tests from the master branch.
14 lines
214 B
Python
14 lines
214 B
Python
import torch
|
|
|
|
import triton
|
|
import triton.language as tl
|
|
|
|
|
|
@triton.jit
|
|
def kernel(X, stride_xm, stride_xn, BLOCK: tl.constexpr):
|
|
pass
|
|
|
|
|
|
X = torch.randn(1, device="cuda")
|
|
pgm = kernel[(1,)](X, 1, 1, BLOCK=1024)
|