[PYTHON] Added TRITON_DEBUG_MODE which reallocates input tensors outside of the pytorch memory pool to spot out-of-bounds accesses more easily

2020-12-26 01:43:38 -05:00
parent c4fceeea49
commit c0bc7ed8b0
3 changed files with 27 additions and 5 deletions
--- a/python/examples/tutorials/vec_add.py
+++ b/python/examples/tutorials/vec_add.py
@@ -14,7 +14,7 @@ __global__ void add(float* z, float* x, float* y, int N) {

    bool check[TILE] = offset < N;

-    *?(check)pz = *?(check)px + *?(check)py;
+    *pz = *px + *py;
 }
    """

@@ -32,9 +32,8 @@ add = _add.apply

 # test
 torch.manual_seed(0)
-x = torch.rand(98432).cuda()
-y = torch.rand(98432).cuda()
+x = torch.rand(900).cuda()
+y = torch.rand(900).cuda()
 za = x + y
 zb = add(x, y)
-
 print(torch.allclose(za,zb))