import triton import triton.language as tl @triton.jit def math_kernel(x1_ptr, x2_ptr, x3_ptr, x4_ptr, n, BLOCK_SIZE: tl.constexpr): offsets = tl.arange(0, BLOCK_SIZE) x1 = tl.load(x1_ptr + offsets, mask=offsets < n) x2 = tl.load(x2_ptr + offsets, mask=offsets < n) x3 = tl.load(x3_ptr + offsets, mask=offsets < n) x4 = tl.load(x4_ptr + offsets, mask=offsets < n) y1 = tl.sin(x1) y2 = tl.libdevice.sin(x2) y3 = tl.libdevice.div_rn(x3, x3) y4 = tl.libdevice.fma_rd(x4, x4, x4) tl.store(x1_ptr + offsets, y1, mask=offsets < n) tl.store(x2_ptr + offsets, y2, mask=offsets < n) tl.store(x3_ptr + offsets, y3, mask=offsets < n) tl.store(x4_ptr + offsets, y4, mask=offsets < n) def test_empty_kernel_cubin_compile(): kernel = triton.compiler._compile(math_kernel, "*fp32,*fp32,*fp32,*fp32,i32", device=0, constants={"BLOCK_SIZE": 256}, output="ttgir") # "cubin" assert kernel # TODO: Check if the values are correct. # TODO: Cover all the math operators