[TritonGPU] Improved documentation and semantics of layout encodings (#30)

2022-07-31 13:59:44 -07:00
parent e02c82c765
commit d1593e6ca8
17 changed files with 399 additions and 566 deletions
--- a/python/examples/copy_strided.py
+++ b/python/examples/copy_strided.py
@@ -10,10 +10,10 @@ def kernel(X, stride_xm, stride_xn,
           BLOCK_M: tl.constexpr, BLOCK_N: tl.constexpr):
    off_m = tl.arange(0, BLOCK_M)
    off_n = tl.arange(0, BLOCK_N)
-    Xs = X + off_m[:, None] * stride_xm + off_n[None, :] * stride_xn
-    Zs = Z + off_m[:, None] * stride_zm + off_n[None, :] * stride_zn
+    Xs = X + off_m[:, None] * stride_xm + off_n[None, :] * 1
+    Zs = Z + off_m[:, None] * 1 + off_n[None, :] * stride_zn
    tl.store(Zs, tl.load(Xs))


-ret = triton.compile(kernel, "*fp32,i32,i32,*fp32,i32,i32", constants={"BLOCK_M": 128, "BLOCK_N": 128}, output="ttgir")
+ret = triton.compile(kernel, "*fp32,i32,i32,*fp32,i32,i32", constants={"BLOCK_M": 64, "BLOCK_N": 64}, output="ttgir")
 print(ret)