diff --git a/master/.buildinfo b/master/.buildinfo index a52db4f2d..c5e9bda28 100644 --- a/master/.buildinfo +++ b/master/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 43dd2d33a5d2138d26dc415193c66c45 +config: 9f90f53619be8e157415c1b1b1939b53 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/master/.doctrees/environment.pickle b/master/.doctrees/environment.pickle index 82db1ef69..9b91eb791 100644 Binary files a/master/.doctrees/environment.pickle and b/master/.doctrees/environment.pickle differ diff --git a/master/.doctrees/getting-started/installation.doctree b/master/.doctrees/getting-started/installation.doctree index e34e31c61..8595830a4 100644 Binary files a/master/.doctrees/getting-started/installation.doctree and b/master/.doctrees/getting-started/installation.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/01-vector-add.doctree b/master/.doctrees/getting-started/tutorials/01-vector-add.doctree index 072fa4d5c..0d2ce8802 100644 Binary files a/master/.doctrees/getting-started/tutorials/01-vector-add.doctree and b/master/.doctrees/getting-started/tutorials/01-vector-add.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree b/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree index 4d5b7fdda..75c50c791 100644 Binary files a/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree and b/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree b/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree index cf1e0af8c..3d882cdf5 100644 Binary files a/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree and b/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree b/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree index 78516e662..9402925e6 100644 Binary files a/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree and b/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree b/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree index 55b5357ea..715c72e9b 100644 Binary files a/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree and b/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/06-fused-attention.doctree b/master/.doctrees/getting-started/tutorials/06-fused-attention.doctree index 078f45abd..e98ebc697 100644 Binary files a/master/.doctrees/getting-started/tutorials/06-fused-attention.doctree and b/master/.doctrees/getting-started/tutorials/06-fused-attention.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/07-libdevice-function.doctree b/master/.doctrees/getting-started/tutorials/07-libdevice-function.doctree index be747e1bf..69c4ed09c 100644 Binary files a/master/.doctrees/getting-started/tutorials/07-libdevice-function.doctree and b/master/.doctrees/getting-started/tutorials/07-libdevice-function.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/index.doctree b/master/.doctrees/getting-started/tutorials/index.doctree index 024ad43fe..66d4db8cb 100644 Binary files a/master/.doctrees/getting-started/tutorials/index.doctree and b/master/.doctrees/getting-started/tutorials/index.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree b/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree index d260291b4..ced0a043c 100644 Binary files a/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree and b/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree differ diff --git a/master/.doctrees/index.doctree b/master/.doctrees/index.doctree index 2d4d1da38..7ea8e4e07 100644 Binary files a/master/.doctrees/index.doctree and b/master/.doctrees/index.doctree differ diff --git a/master/.doctrees/programming-guide/chapter-1/introduction.doctree b/master/.doctrees/programming-guide/chapter-1/introduction.doctree index 29f8dd8f0..a823341dd 100644 Binary files a/master/.doctrees/programming-guide/chapter-1/introduction.doctree and b/master/.doctrees/programming-guide/chapter-1/introduction.doctree differ diff --git a/master/.doctrees/programming-guide/chapter-2/related-work.doctree b/master/.doctrees/programming-guide/chapter-2/related-work.doctree index 159a13cd6..a636ba9a5 100644 Binary files a/master/.doctrees/programming-guide/chapter-2/related-work.doctree and b/master/.doctrees/programming-guide/chapter-2/related-work.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.Config.doctree b/master/.doctrees/python-api/generated/triton.Config.doctree index cd878ecd6..a2828e746 100644 Binary files a/master/.doctrees/python-api/generated/triton.Config.doctree and b/master/.doctrees/python-api/generated/triton.Config.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.autotune.doctree b/master/.doctrees/python-api/generated/triton.autotune.doctree index cf593a95e..d6c4ac612 100644 Binary files a/master/.doctrees/python-api/generated/triton.autotune.doctree and b/master/.doctrees/python-api/generated/triton.autotune.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.heuristics.doctree b/master/.doctrees/python-api/generated/triton.heuristics.doctree index 38abeea91..e3fbb9156 100644 Binary files a/master/.doctrees/python-api/generated/triton.heuristics.doctree and b/master/.doctrees/python-api/generated/triton.heuristics.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.jit.doctree b/master/.doctrees/python-api/generated/triton.jit.doctree index aa15963c7..a2207c5c1 100644 Binary files a/master/.doctrees/python-api/generated/triton.jit.doctree and b/master/.doctrees/python-api/generated/triton.jit.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.arange.doctree b/master/.doctrees/python-api/generated/triton.language.arange.doctree index be9b5be18..4ff50baf5 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.arange.doctree and b/master/.doctrees/python-api/generated/triton.language.arange.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree index 39b44e8df..b0402bfb1 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_and.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_and.doctree index 77a3cd06f..45f05096c 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_and.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_and.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree index c1d26e593..a4b71e169 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree index e2550225c..8ecff32bf 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree index f30373ce0..534a84157 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_or.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_or.doctree index d43436194..9ab4ebf3b 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_or.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_or.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree index fe81aa2eb..1e87dcbaf 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_xor.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_xor.doctree index 7fcc68996..12d91c06e 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_xor.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_xor.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree b/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree index c3d651baa..56663ff57 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree and b/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.cos.doctree b/master/.doctrees/python-api/generated/triton.language.cos.doctree index f1c33027d..e19d7189a 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.cos.doctree and b/master/.doctrees/python-api/generated/triton.language.cos.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.dot.doctree b/master/.doctrees/python-api/generated/triton.language.dot.doctree index 727d34eb6..17102bbf0 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.dot.doctree and b/master/.doctrees/python-api/generated/triton.language.dot.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.exp.doctree b/master/.doctrees/python-api/generated/triton.language.exp.doctree index 937637d92..4881374d4 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.exp.doctree and b/master/.doctrees/python-api/generated/triton.language.exp.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.load.doctree b/master/.doctrees/python-api/generated/triton.language.load.doctree index 49f55b876..7f6c217b7 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.load.doctree and b/master/.doctrees/python-api/generated/triton.language.load.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.log.doctree b/master/.doctrees/python-api/generated/triton.language.log.doctree index 834681af8..4273f257c 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.log.doctree and b/master/.doctrees/python-api/generated/triton.language.log.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.max.doctree b/master/.doctrees/python-api/generated/triton.language.max.doctree index 953e9c3fa..6143cbf41 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.max.doctree and b/master/.doctrees/python-api/generated/triton.language.max.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.maximum.doctree b/master/.doctrees/python-api/generated/triton.language.maximum.doctree index 8147f6568..dd3ec9380 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.maximum.doctree and b/master/.doctrees/python-api/generated/triton.language.maximum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.min.doctree b/master/.doctrees/python-api/generated/triton.language.min.doctree index e8a8c84ad..fef69d209 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.min.doctree and b/master/.doctrees/python-api/generated/triton.language.min.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.minimum.doctree b/master/.doctrees/python-api/generated/triton.language.minimum.doctree index 267854c1a..10bd62ced 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.minimum.doctree and b/master/.doctrees/python-api/generated/triton.language.minimum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree b/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree index 0f93dcaad..5640cf1c5 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree and b/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.num_programs.doctree b/master/.doctrees/python-api/generated/triton.language.num_programs.doctree index 2e2f86b35..410a1b9d3 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.num_programs.doctree and b/master/.doctrees/python-api/generated/triton.language.num_programs.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.program_id.doctree b/master/.doctrees/python-api/generated/triton.language.program_id.doctree index b45c70588..5f7ce233a 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.program_id.doctree and b/master/.doctrees/python-api/generated/triton.language.program_id.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.rand.doctree b/master/.doctrees/python-api/generated/triton.language.rand.doctree index 260d632d9..143e7cf3b 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.rand.doctree and b/master/.doctrees/python-api/generated/triton.language.rand.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randint.doctree b/master/.doctrees/python-api/generated/triton.language.randint.doctree index 0e9bf9c28..d8401b1f2 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randint.doctree and b/master/.doctrees/python-api/generated/triton.language.randint.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randint4x.doctree b/master/.doctrees/python-api/generated/triton.language.randint4x.doctree index ca71da0a7..e0d4aa737 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randint4x.doctree and b/master/.doctrees/python-api/generated/triton.language.randint4x.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randn.doctree b/master/.doctrees/python-api/generated/triton.language.randn.doctree index 723d76077..4f190c6bc 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randn.doctree and b/master/.doctrees/python-api/generated/triton.language.randn.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.ravel.doctree b/master/.doctrees/python-api/generated/triton.language.ravel.doctree index d8cb9afc0..30b494275 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.ravel.doctree and b/master/.doctrees/python-api/generated/triton.language.ravel.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.reshape.doctree b/master/.doctrees/python-api/generated/triton.language.reshape.doctree index 2a1e2074e..4e19fde23 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.reshape.doctree and b/master/.doctrees/python-api/generated/triton.language.reshape.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree b/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree index b26be601c..b3a27aad4 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree and b/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sin.doctree b/master/.doctrees/python-api/generated/triton.language.sin.doctree index 8add1a512..8a5331254 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sin.doctree and b/master/.doctrees/python-api/generated/triton.language.sin.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.softmax.doctree b/master/.doctrees/python-api/generated/triton.language.softmax.doctree index 98c350505..0db201ee8 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.softmax.doctree and b/master/.doctrees/python-api/generated/triton.language.softmax.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sqrt.doctree b/master/.doctrees/python-api/generated/triton.language.sqrt.doctree index 4a30dc933..48c950bb9 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sqrt.doctree and b/master/.doctrees/python-api/generated/triton.language.sqrt.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.store.doctree b/master/.doctrees/python-api/generated/triton.language.store.doctree index 92c2eaa52..48ef9ade5 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.store.doctree and b/master/.doctrees/python-api/generated/triton.language.store.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sum.doctree b/master/.doctrees/python-api/generated/triton.language.sum.doctree index e7b7476cb..7fa19f1a1 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sum.doctree and b/master/.doctrees/python-api/generated/triton.language.sum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.where.doctree b/master/.doctrees/python-api/generated/triton.language.where.doctree index 759ea0990..a6ddd9e88 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.where.doctree and b/master/.doctrees/python-api/generated/triton.language.where.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.zeros.doctree b/master/.doctrees/python-api/generated/triton.language.zeros.doctree index f9a665bf5..0c678c7c7 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.zeros.doctree and b/master/.doctrees/python-api/generated/triton.language.zeros.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree b/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree index f6e9e0d9d..66a0834b1 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree and b/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree b/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree index 71a7a95e8..855d9e8ea 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree and b/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree b/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree index b67ec4438..1e632c542 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree and b/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree differ diff --git a/master/.doctrees/python-api/triton.doctree b/master/.doctrees/python-api/triton.doctree index b03584e26..4f6a2d6d0 100644 Binary files a/master/.doctrees/python-api/triton.doctree and b/master/.doctrees/python-api/triton.doctree differ diff --git a/master/.doctrees/python-api/triton.language.doctree b/master/.doctrees/python-api/triton.language.doctree index 19cc40980..01740589c 100644 Binary files a/master/.doctrees/python-api/triton.language.doctree and b/master/.doctrees/python-api/triton.language.doctree differ diff --git a/master/.doctrees/python-api/triton.testing.doctree b/master/.doctrees/python-api/triton.testing.doctree index cbca9700b..6ace21870 100644 Binary files a/master/.doctrees/python-api/triton.testing.doctree and b/master/.doctrees/python-api/triton.testing.doctree differ diff --git a/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip b/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip index efd2a2523..a6a59be00 100644 Binary files a/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip and b/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip differ diff --git a/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip b/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip index 0af89958e..07e924c29 100644 Binary files a/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip and b/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip differ diff --git a/master/_images/sphx_glr_01-vector-add_001.png b/master/_images/sphx_glr_01-vector-add_001.png index e1f676720..419821c3b 100644 Binary files a/master/_images/sphx_glr_01-vector-add_001.png and b/master/_images/sphx_glr_01-vector-add_001.png differ diff --git a/master/_images/sphx_glr_01-vector-add_thumb.png b/master/_images/sphx_glr_01-vector-add_thumb.png index 87849868e..494007a01 100644 Binary files a/master/_images/sphx_glr_01-vector-add_thumb.png and b/master/_images/sphx_glr_01-vector-add_thumb.png differ diff --git a/master/_images/sphx_glr_02-fused-softmax_001.png b/master/_images/sphx_glr_02-fused-softmax_001.png index b750cdef5..8123d70d0 100644 Binary files a/master/_images/sphx_glr_02-fused-softmax_001.png and b/master/_images/sphx_glr_02-fused-softmax_001.png differ diff --git a/master/_images/sphx_glr_02-fused-softmax_thumb.png b/master/_images/sphx_glr_02-fused-softmax_thumb.png index 55ef93dda..c422dc99f 100644 Binary files a/master/_images/sphx_glr_02-fused-softmax_thumb.png and b/master/_images/sphx_glr_02-fused-softmax_thumb.png differ diff --git a/master/_images/sphx_glr_03-matrix-multiplication_001.png b/master/_images/sphx_glr_03-matrix-multiplication_001.png index 813cbe70e..5138654d4 100644 Binary files a/master/_images/sphx_glr_03-matrix-multiplication_001.png and b/master/_images/sphx_glr_03-matrix-multiplication_001.png differ diff --git a/master/_images/sphx_glr_03-matrix-multiplication_thumb.png b/master/_images/sphx_glr_03-matrix-multiplication_thumb.png index dc53a4ffe..8fd0f8423 100644 Binary files a/master/_images/sphx_glr_03-matrix-multiplication_thumb.png and b/master/_images/sphx_glr_03-matrix-multiplication_thumb.png differ diff --git a/master/_images/sphx_glr_05-layer-norm_001.png b/master/_images/sphx_glr_05-layer-norm_001.png index f8d6f63f3..29f06be67 100644 Binary files a/master/_images/sphx_glr_05-layer-norm_001.png and b/master/_images/sphx_glr_05-layer-norm_001.png differ diff --git a/master/_images/sphx_glr_05-layer-norm_thumb.png b/master/_images/sphx_glr_05-layer-norm_thumb.png index 524b67080..786b70b08 100644 Binary files a/master/_images/sphx_glr_05-layer-norm_thumb.png and b/master/_images/sphx_glr_05-layer-norm_thumb.png differ diff --git a/master/_sources/getting-started/tutorials/01-vector-add.rst.txt b/master/_sources/getting-started/tutorials/01-vector-add.rst.txt index 2231c9cdc..53755e3c8 100644 --- a/master/_sources/getting-started/tutorials/01-vector-add.rst.txt +++ b/master/_sources/getting-started/tutorials/01-vector-add.rst.txt @@ -233,12 +233,12 @@ We can now run the decorated function above. Pass `print_data=True` to see the p vector-add-performance: size Triton Torch 0 4096.0 9.600000 9.600000 - 1 8192.0 19.200000 19.200000 - 2 16384.0 31.999999 38.400001 + 1 8192.0 15.999999 15.999999 + 2 16384.0 38.400001 38.400001 3 32768.0 76.800002 76.800002 4 65536.0 127.999995 127.999995 5 131072.0 219.428568 219.428568 - 6 262144.0 384.000001 341.333321 + 6 262144.0 384.000001 384.000001 7 524288.0 472.615390 472.615390 8 1048576.0 614.400016 614.400016 9 2097152.0 722.823517 722.823517 @@ -255,7 +255,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 1 minutes 44.610 seconds) + **Total running time of the script:** ( 1 minutes 43.794 seconds) .. _sphx_glr_download_getting-started_tutorials_01-vector-add.py: diff --git a/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt b/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt index 8f24a1c20..1ecd716b0 100644 --- a/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt +++ b/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt @@ -278,17 +278,17 @@ We will then compare its performance against (1) :code:`torch.softmax` and (2) t softmax-performance: N Triton Torch (native) Torch (jit) - 0 256.0 546.133347 546.133347 186.181817 + 0 256.0 546.133347 546.133347 188.321838 1 384.0 614.400016 585.142862 153.600004 2 512.0 655.360017 606.814814 154.566038 3 640.0 706.206879 640.000002 160.000000 4 768.0 722.823517 664.216187 162.754967 .. ... ... ... ... - 93 12160.0 812.359066 406.179533 198.530610 - 94 12288.0 812.429770 416.101597 198.895304 - 95 12416.0 812.498981 412.149375 198.457532 - 96 12544.0 810.925276 412.971190 198.766042 - 97 12672.0 811.007961 412.097543 198.873965 + 93 12160.0 812.359066 406.179533 198.733401 + 94 12288.0 812.429770 415.661740 198.995960 + 95 12416.0 812.498981 412.149375 198.655991 + 96 12544.0 812.566838 412.971190 198.864492 + 97 12672.0 811.007961 412.097543 198.971549 [98 rows x 4 columns] @@ -306,7 +306,7 @@ In the above plot, we can see that: .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 3 minutes 31.794 seconds) + **Total running time of the script:** ( 3 minutes 29.999 seconds) .. _sphx_glr_download_getting-started_tutorials_02-fused-softmax.py: diff --git a/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt b/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt index 92fea389b..86d6ee0a4 100644 --- a/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt +++ b/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt @@ -459,37 +459,37 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we matmul-performance: M cuBLAS ... Triton Triton (+ LeakyReLU) - 0 256.0 2.730667 ... 3.276800 2.978909 + 0 256.0 2.730667 ... 2.978909 3.276800 1 384.0 7.372800 ... 8.507077 8.507077 2 512.0 14.563555 ... 16.384000 16.384000 3 640.0 22.260869 ... 24.380953 24.380953 4 768.0 32.768000 ... 35.389441 34.028308 5 896.0 39.025776 ... 40.140799 39.025776 6 1024.0 49.932191 ... 53.773130 52.428801 - 7 1152.0 44.566925 ... 47.396572 47.396572 + 7 1152.0 45.242181 ... 47.396572 47.396572 8 1280.0 51.200001 ... 57.690139 57.690139 9 1408.0 64.138541 ... 68.147202 67.305878 - 10 1536.0 80.430545 ... 81.355034 78.643199 + 10 1536.0 80.430545 ... 81.355034 79.526831 11 1664.0 62.929456 ... 63.372618 62.492442 12 1792.0 72.512412 ... 73.460287 59.467852 - 13 1920.0 69.120002 ... 71.626943 71.257735 - 14 2048.0 73.908442 ... 78.398206 77.314362 - 15 2176.0 83.500614 ... 87.876193 86.367588 - 16 2304.0 68.446623 ... 78.064941 77.307030 - 17 2432.0 71.305746 ... 86.711310 85.653855 + 13 1920.0 69.120002 ... 71.257735 71.257735 + 14 2048.0 73.584279 ... 78.398206 77.314362 + 15 2176.0 83.500614 ... 87.494120 85.998493 + 16 2304.0 68.251065 ... 78.064941 77.307030 + 17 2432.0 71.305746 ... 86.711310 83.614477 18 2560.0 78.019048 ... 82.747477 81.715711 - 19 2688.0 83.737433 ... 91.185232 89.464755 - 20 2816.0 83.074685 ... 84.687779 83.712490 - 21 2944.0 82.102191 ... 81.034195 82.784108 - 22 3072.0 82.540970 ... 89.170242 87.516392 - 23 3200.0 84.432717 ... 96.822991 95.808380 - 24 3328.0 83.130825 ... 85.500351 84.447271 - 25 3456.0 81.974138 ... 91.719645 90.079964 - 26 3584.0 83.876297 ... 90.549237 94.847460 - 27 3712.0 85.601834 ... 81.285505 86.044224 - 28 3840.0 82.840447 ... 92.236860 86.130841 - 29 3968.0 93.148045 ... 80.120775 84.856701 - 30 4096.0 86.258181 ... 85.762979 90.565269 + 19 2688.0 83.737433 ... 90.316801 89.254248 + 20 2816.0 79.733474 ... 84.197315 83.074685 + 21 2944.0 82.034625 ... 83.060049 82.237674 + 22 3072.0 82.661468 ... 85.147525 88.750943 + 23 3200.0 84.768213 ... 94.814812 95.808380 + 24 3328.0 83.034941 ... 85.096096 81.346098 + 25 3456.0 81.026701 ... 89.579522 83.545665 + 26 3584.0 85.633710 ... 93.661869 94.947616 + 27 3712.0 85.455380 ... 87.246590 87.552452 + 28 3840.0 81.738356 ... 89.766237 89.693434 + 29 3968.0 88.938731 ... 92.163097 85.093402 + 30 4096.0 93.401342 ... 86.009438 85.543487 [31 rows x 5 columns] @@ -499,7 +499,7 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 6 minutes 45.893 seconds) + **Total running time of the script:** ( 6 minutes 31.264 seconds) .. _sphx_glr_download_getting-started_tutorials_03-matrix-multiplication.py: diff --git a/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt b/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt index aa2d03dc4..7569a7b86 100644 --- a/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt +++ b/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt @@ -40,24 +40,24 @@ Layer Normalization N Triton Torch Apex 0 1024.0 585.142849 277.694907 468.114273 1 1536.0 630.153868 323.368435 511.999982 - 2 2048.0 682.666643 334.367358 520.126988 + 2 2048.0 668.734716 334.367358 520.126988 3 2560.0 694.237267 365.714281 518.481028 4 3072.0 712.347810 378.092307 496.484863 - 5 3584.0 725.873439 384.859062 451.527536 + 5 3584.0 725.873439 384.859062 455.111115 6 4096.0 728.177767 381.023256 455.111095 7 4608.0 670.254540 394.267384 421.302872 8 5120.0 688.403381 397.669909 424.455959 - 9 5632.0 704.000002 395.228063 411.470331 + 9 5632.0 704.000002 395.228063 413.357796 10 6144.0 697.191505 402.885254 409.600010 11 6656.0 700.631610 400.360920 400.360920 - 12 7168.0 686.754468 396.844306 387.459443 - 13 7680.0 682.666656 393.846167 386.415087 + 12 7168.0 690.891575 396.844306 387.459443 + 13 7680.0 678.895043 393.846167 386.415087 14 8192.0 636.271854 393.609605 371.308771 - 15 8704.0 627.315309 389.005597 381.545190 + 15 8704.0 627.315309 389.005597 380.502740 16 9216.0 606.814809 407.337026 383.999986 17 9728.0 587.350922 409.599987 383.369452 18 10240.0 564.965524 408.578556 382.803739 - 19 10752.0 546.133312 411.559798 381.445676 + 19 10752.0 547.872604 411.559798 381.445676 20 11264.0 533.207081 406.826188 373.134567 21 11776.0 520.486200 409.599991 377.587162 22 12288.0 514.680630 413.911572 383.251457 @@ -67,7 +67,7 @@ Layer Normalization 26 14336.0 471.967074 406.695045 374.185964 27 14848.0 461.297068 408.192434 375.304904 28 15360.0 454.269882 406.214870 378.092307 - 29 15872.0 447.887117 406.974373 376.225175 + 29 15872.0 447.098578 406.974373 376.225175 @@ -393,7 +393,7 @@ Layer Normalization .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 5 minutes 39.453 seconds) + **Total running time of the script:** ( 5 minutes 33.449 seconds) .. _sphx_glr_download_getting-started_tutorials_05-layer-norm.py: diff --git a/master/_sources/getting-started/tutorials/07-libdevice-function.rst.txt b/master/_sources/getting-started/tutorials/07-libdevice-function.rst.txt index 228c0d6c8..297ebf116 100644 --- a/master/_sources/getting-started/tutorials/07-libdevice-function.rst.txt +++ b/master/_sources/getting-started/tutorials/07-libdevice-function.rst.txt @@ -152,7 +152,7 @@ We can also customize the libdevice library path by passing the path to the `lib .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 0 minutes 0.011 seconds) + **Total running time of the script:** ( 0 minutes 0.010 seconds) .. _sphx_glr_download_getting-started_tutorials_07-libdevice-function.py: diff --git a/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt b/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt index 27e926cf3..21aa8209e 100644 --- a/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt +++ b/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt @@ -5,20 +5,20 @@ Computation times ================= -**17:41.846** total execution time for **getting-started_tutorials** files: +**17:18.602** total execution time for **getting-started_tutorials** files: +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 06:45.893 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 06:31.264 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 05:39.453 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 05:33.449 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:31.794 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:29.999 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:44.610 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:43.794 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_getting-started_tutorials_06-fused-attention.py` (``06-fused-attention.py``) | 00:00.073 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) | 00:00.012 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_07-libdevice-function.py` (``07-libdevice-function.py``) | 00:00.011 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_07-libdevice-function.py` (``07-libdevice-function.py``) | 00:00.010 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ diff --git a/master/getting-started/tutorials/01-vector-add.html b/master/getting-started/tutorials/01-vector-add.html index a55280650..c120d7f77 100644 --- a/master/getting-started/tutorials/01-vector-add.html +++ b/master/getting-started/tutorials/01-vector-add.html @@ -325,12 +325,12 @@ for different problem sizes.

vector-add-performance:
            size      Triton       Torch
 0        4096.0    9.600000    9.600000
-1        8192.0   19.200000   19.200000
-2       16384.0   31.999999   38.400001
+1        8192.0   15.999999   15.999999
+2       16384.0   38.400001   38.400001
 3       32768.0   76.800002   76.800002
 4       65536.0  127.999995  127.999995
 5      131072.0  219.428568  219.428568
-6      262144.0  384.000001  341.333321
+6      262144.0  384.000001  384.000001
 7      524288.0  472.615390  472.615390
 8     1048576.0  614.400016  614.400016
 9     2097152.0  722.823517  722.823517
@@ -342,7 +342,7 @@ for different problem sizes.

15 134217728.0 849.737435 850.656574
-

Total running time of the script: ( 1 minutes 44.610 seconds)

+

Total running time of the script: ( 1 minutes 43.794 seconds)