[DOCS] Updates and improvements (#87)

2021-04-22 10:27:02 -04:00
parent 39f4730305
commit 29e33e50b7
8 changed files with 195 additions and 70 deletions
--- a/python/tutorials/02-fused-softmax.py
+++ b/python/tutorials/02-fused-softmax.py
@@ -100,7 +100,7 @@ def softmax(x):
    # Allocate output
    y = torch.empty_like(x)
    # Enqueue kernel. The launch grid is simple: we have one kernel instance per row of the input matrix
-    _softmax[(M, )](y, x, x.stride(0), y.stride(0), M, N, BLOCK=BLOCK)
+    _softmax[(M, )](y, x, x.stride(0), y.stride(0), M, N, num_warps=num_warps, BLOCK=BLOCK)
    return y