diff --git a/master/.buildinfo b/master/.buildinfo index b4909c5b9..058664b94 100644 --- a/master/.buildinfo +++ b/master/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 41a8f7ded8f17e74cdc1cd298d4905e1 +config: 7854d4ee64add3f68246c6489202978a tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/master/.doctrees/environment.pickle b/master/.doctrees/environment.pickle index 19e122332..e6629c9f6 100644 Binary files a/master/.doctrees/environment.pickle and b/master/.doctrees/environment.pickle differ diff --git a/master/.doctrees/getting-started/installation.doctree b/master/.doctrees/getting-started/installation.doctree index f7ad37cd1..3549d3770 100644 Binary files a/master/.doctrees/getting-started/installation.doctree and b/master/.doctrees/getting-started/installation.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/01-vector-add.doctree b/master/.doctrees/getting-started/tutorials/01-vector-add.doctree index 8a5c83d81..1433801ed 100644 Binary files a/master/.doctrees/getting-started/tutorials/01-vector-add.doctree and b/master/.doctrees/getting-started/tutorials/01-vector-add.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree b/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree index a209f3609..88c61a7ea 100644 Binary files a/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree and b/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree b/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree index 5d2074c72..dbc3fbab4 100644 Binary files a/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree and b/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree b/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree index 18f6d47d8..a99ec4a04 100644 Binary files a/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree and b/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree b/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree index 9c6384a06..d06af20a7 100644 Binary files a/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree and b/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/index.doctree b/master/.doctrees/getting-started/tutorials/index.doctree index e7f8a3f05..97026dfa3 100644 Binary files a/master/.doctrees/getting-started/tutorials/index.doctree and b/master/.doctrees/getting-started/tutorials/index.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree b/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree index 107c397cb..656fc563c 100644 Binary files a/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree and b/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree differ diff --git a/master/.doctrees/index.doctree b/master/.doctrees/index.doctree index 2ecf29721..9436dfc14 100644 Binary files a/master/.doctrees/index.doctree and b/master/.doctrees/index.doctree differ diff --git a/master/.doctrees/programming-guide/chapter-1/introduction.doctree b/master/.doctrees/programming-guide/chapter-1/introduction.doctree index 07fdcc8f7..f98a21cc0 100644 Binary files a/master/.doctrees/programming-guide/chapter-1/introduction.doctree and b/master/.doctrees/programming-guide/chapter-1/introduction.doctree differ diff --git a/master/.doctrees/programming-guide/chapter-2/related-work.doctree b/master/.doctrees/programming-guide/chapter-2/related-work.doctree index 0a6d0058b..b957f25e7 100644 Binary files a/master/.doctrees/programming-guide/chapter-2/related-work.doctree and b/master/.doctrees/programming-guide/chapter-2/related-work.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.Config.doctree b/master/.doctrees/python-api/generated/triton.Config.doctree index 368678183..ee4188a92 100644 Binary files a/master/.doctrees/python-api/generated/triton.Config.doctree and b/master/.doctrees/python-api/generated/triton.Config.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.autotune.doctree b/master/.doctrees/python-api/generated/triton.autotune.doctree index 75e2dc0a6..cc059c08f 100644 Binary files a/master/.doctrees/python-api/generated/triton.autotune.doctree and b/master/.doctrees/python-api/generated/triton.autotune.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.heuristics.doctree b/master/.doctrees/python-api/generated/triton.heuristics.doctree index 0afe77637..d173a5cff 100644 Binary files a/master/.doctrees/python-api/generated/triton.heuristics.doctree and b/master/.doctrees/python-api/generated/triton.heuristics.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.jit.doctree b/master/.doctrees/python-api/generated/triton.jit.doctree index 46f0ea1ff..248b53ebe 100644 Binary files a/master/.doctrees/python-api/generated/triton.jit.doctree and b/master/.doctrees/python-api/generated/triton.jit.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.arange.doctree b/master/.doctrees/python-api/generated/triton.language.arange.doctree index 65eb7e84e..be644b96e 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.arange.doctree and b/master/.doctrees/python-api/generated/triton.language.arange.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree index 16cb6e9b3..d38085e16 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree index c4e01dd01..9be921507 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree index e76d71812..c6fb7d116 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree index 3aac0eb65..5c3105613 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree index aa1768370..5d0b9769d 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree b/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree index c4c1a48da..662d8b3cc 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree and b/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.cos.doctree b/master/.doctrees/python-api/generated/triton.language.cos.doctree index ea3a0b0b2..e09e1ac3b 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.cos.doctree and b/master/.doctrees/python-api/generated/triton.language.cos.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.dot.doctree b/master/.doctrees/python-api/generated/triton.language.dot.doctree index 6e25469cc..989b20482 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.dot.doctree and b/master/.doctrees/python-api/generated/triton.language.dot.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.exp.doctree b/master/.doctrees/python-api/generated/triton.language.exp.doctree index 613a263fd..c98c39c5f 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.exp.doctree and b/master/.doctrees/python-api/generated/triton.language.exp.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.load.doctree b/master/.doctrees/python-api/generated/triton.language.load.doctree index 9d61c772c..9a8d1fa97 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.load.doctree and b/master/.doctrees/python-api/generated/triton.language.load.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.log.doctree b/master/.doctrees/python-api/generated/triton.language.log.doctree index 7fe805dbe..a1b3c3b06 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.log.doctree and b/master/.doctrees/python-api/generated/triton.language.log.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.max.doctree b/master/.doctrees/python-api/generated/triton.language.max.doctree index 353b6e78f..03a157d99 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.max.doctree and b/master/.doctrees/python-api/generated/triton.language.max.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.maximum.doctree b/master/.doctrees/python-api/generated/triton.language.maximum.doctree index e06f2d462..dcffbf689 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.maximum.doctree and b/master/.doctrees/python-api/generated/triton.language.maximum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.min.doctree b/master/.doctrees/python-api/generated/triton.language.min.doctree index 2ee2c8b21..c5faa8792 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.min.doctree and b/master/.doctrees/python-api/generated/triton.language.min.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.minimum.doctree b/master/.doctrees/python-api/generated/triton.language.minimum.doctree index 38e5ccf57..5a074bdfc 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.minimum.doctree and b/master/.doctrees/python-api/generated/triton.language.minimum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree b/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree index 9dd9564cd..d6be0ad65 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree and b/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.num_programs.doctree b/master/.doctrees/python-api/generated/triton.language.num_programs.doctree index cfae99d14..33d664f4f 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.num_programs.doctree and b/master/.doctrees/python-api/generated/triton.language.num_programs.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.program_id.doctree b/master/.doctrees/python-api/generated/triton.language.program_id.doctree index 9ae3c9432..8ad9a918f 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.program_id.doctree and b/master/.doctrees/python-api/generated/triton.language.program_id.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.rand.doctree b/master/.doctrees/python-api/generated/triton.language.rand.doctree index 9f7294dfd..8c3fe21c0 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.rand.doctree and b/master/.doctrees/python-api/generated/triton.language.rand.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randint.doctree b/master/.doctrees/python-api/generated/triton.language.randint.doctree index ba5431a37..c528d2788 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randint.doctree and b/master/.doctrees/python-api/generated/triton.language.randint.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randint4x.doctree b/master/.doctrees/python-api/generated/triton.language.randint4x.doctree index bcfc6f60e..0f02567cf 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randint4x.doctree and b/master/.doctrees/python-api/generated/triton.language.randint4x.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randn.doctree b/master/.doctrees/python-api/generated/triton.language.randn.doctree index ae2712ac4..1ecec0692 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randn.doctree and b/master/.doctrees/python-api/generated/triton.language.randn.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.ravel.doctree b/master/.doctrees/python-api/generated/triton.language.ravel.doctree index 792a833da..a28777b8c 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.ravel.doctree and b/master/.doctrees/python-api/generated/triton.language.ravel.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.reshape.doctree b/master/.doctrees/python-api/generated/triton.language.reshape.doctree index 5cd9b381c..10341e4bc 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.reshape.doctree and b/master/.doctrees/python-api/generated/triton.language.reshape.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree b/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree index f631e3e52..aaadeddf3 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree and b/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sin.doctree b/master/.doctrees/python-api/generated/triton.language.sin.doctree index 3e0700f98..9518c6630 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sin.doctree and b/master/.doctrees/python-api/generated/triton.language.sin.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.softmax.doctree b/master/.doctrees/python-api/generated/triton.language.softmax.doctree index 231974269..04cedbd9c 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.softmax.doctree and b/master/.doctrees/python-api/generated/triton.language.softmax.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sqrt.doctree b/master/.doctrees/python-api/generated/triton.language.sqrt.doctree index 8a5d94820..35f14ba0f 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sqrt.doctree and b/master/.doctrees/python-api/generated/triton.language.sqrt.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.store.doctree b/master/.doctrees/python-api/generated/triton.language.store.doctree index 565795fa0..87af60026 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.store.doctree and b/master/.doctrees/python-api/generated/triton.language.store.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sum.doctree b/master/.doctrees/python-api/generated/triton.language.sum.doctree index e24908f37..4a04d2959 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sum.doctree and b/master/.doctrees/python-api/generated/triton.language.sum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.where.doctree b/master/.doctrees/python-api/generated/triton.language.where.doctree index ae9014eb6..1b6b310ce 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.where.doctree and b/master/.doctrees/python-api/generated/triton.language.where.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.zeros.doctree b/master/.doctrees/python-api/generated/triton.language.zeros.doctree index b62990d88..546eb09d1 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.zeros.doctree and b/master/.doctrees/python-api/generated/triton.language.zeros.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree b/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree index 7ba03be06..3d7ba96a5 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree and b/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree b/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree index 1fca9c0f6..2ff6337e3 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree and b/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree b/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree index 387a7ae72..20155d22d 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree and b/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree differ diff --git a/master/.doctrees/python-api/triton.doctree b/master/.doctrees/python-api/triton.doctree index e0f707188..d03a96627 100644 Binary files a/master/.doctrees/python-api/triton.doctree and b/master/.doctrees/python-api/triton.doctree differ diff --git a/master/.doctrees/python-api/triton.language.doctree b/master/.doctrees/python-api/triton.language.doctree index e85ac0b28..4b4fe3da5 100644 Binary files a/master/.doctrees/python-api/triton.language.doctree and b/master/.doctrees/python-api/triton.language.doctree differ diff --git a/master/.doctrees/python-api/triton.testing.doctree b/master/.doctrees/python-api/triton.testing.doctree index 5a8375538..8242afdd6 100644 Binary files a/master/.doctrees/python-api/triton.testing.doctree and b/master/.doctrees/python-api/triton.testing.doctree differ diff --git a/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip b/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip index bab9aa2fe..bbcaa53c9 100644 Binary files a/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip and b/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip differ diff --git a/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip b/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip index 65ff005b7..5b1e1e8da 100644 Binary files a/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip and b/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip differ diff --git a/master/_images/sphx_glr_01-vector-add_001.png b/master/_images/sphx_glr_01-vector-add_001.png index eb7cbcfda..1ac9f0395 100644 Binary files a/master/_images/sphx_glr_01-vector-add_001.png and b/master/_images/sphx_glr_01-vector-add_001.png differ diff --git a/master/_images/sphx_glr_01-vector-add_thumb.png b/master/_images/sphx_glr_01-vector-add_thumb.png index 3c720fbe6..da83a5e49 100644 Binary files a/master/_images/sphx_glr_01-vector-add_thumb.png and b/master/_images/sphx_glr_01-vector-add_thumb.png differ diff --git a/master/_images/sphx_glr_02-fused-softmax_001.png b/master/_images/sphx_glr_02-fused-softmax_001.png index cdb283cab..ab46cebe7 100644 Binary files a/master/_images/sphx_glr_02-fused-softmax_001.png and b/master/_images/sphx_glr_02-fused-softmax_001.png differ diff --git a/master/_images/sphx_glr_02-fused-softmax_thumb.png b/master/_images/sphx_glr_02-fused-softmax_thumb.png index 2f3f915d5..35484573f 100644 Binary files a/master/_images/sphx_glr_02-fused-softmax_thumb.png and b/master/_images/sphx_glr_02-fused-softmax_thumb.png differ diff --git a/master/_images/sphx_glr_03-matrix-multiplication_001.png b/master/_images/sphx_glr_03-matrix-multiplication_001.png index 6dbdf3c7a..44b9698bc 100644 Binary files a/master/_images/sphx_glr_03-matrix-multiplication_001.png and b/master/_images/sphx_glr_03-matrix-multiplication_001.png differ diff --git a/master/_images/sphx_glr_03-matrix-multiplication_thumb.png b/master/_images/sphx_glr_03-matrix-multiplication_thumb.png index 9566f060d..c6ce08d4d 100644 Binary files a/master/_images/sphx_glr_03-matrix-multiplication_thumb.png and b/master/_images/sphx_glr_03-matrix-multiplication_thumb.png differ diff --git a/master/_images/sphx_glr_05-layer-norm_001.png b/master/_images/sphx_glr_05-layer-norm_001.png index 334f3d040..9825584af 100644 Binary files a/master/_images/sphx_glr_05-layer-norm_001.png and b/master/_images/sphx_glr_05-layer-norm_001.png differ diff --git a/master/_images/sphx_glr_05-layer-norm_thumb.png b/master/_images/sphx_glr_05-layer-norm_thumb.png index 23a032109..7386415c8 100644 Binary files a/master/_images/sphx_glr_05-layer-norm_thumb.png and b/master/_images/sphx_glr_05-layer-norm_thumb.png differ diff --git a/master/_sources/getting-started/tutorials/01-vector-add.rst.txt b/master/_sources/getting-started/tutorials/01-vector-add.rst.txt index 2c25184ee..a009fdbc3 100644 --- a/master/_sources/getting-started/tutorials/01-vector-add.rst.txt +++ b/master/_sources/getting-started/tutorials/01-vector-add.rst.txt @@ -245,7 +245,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p 10 4194304.0 780.190482 780.190482 11 8388608.0 812.429770 812.429770 12 16777216.0 833.084721 833.084721 - 13 33554432.0 842.004273 843.811163 + 13 33554432.0 842.004273 842.004273 14 67108864.0 847.448255 848.362445 15 134217728.0 849.737435 850.656574 @@ -255,7 +255,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 1 minutes 38.076 seconds) + **Total running time of the script:** ( 1 minutes 40.793 seconds) .. _sphx_glr_download_getting-started_tutorials_01-vector-add.py: diff --git a/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt b/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt index 6b349ffa5..8fe2ff15b 100644 --- a/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt +++ b/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt @@ -279,16 +279,16 @@ We will then compare its performance against (1) :code:`torch.softmax` and (2) t softmax-performance: N Triton Torch (native) Torch (jit) 0 256.0 512.000001 546.133347 188.321838 - 1 384.0 614.400016 585.142862 153.600004 - 2 512.0 655.360017 606.814814 154.566038 + 1 384.0 585.142862 585.142862 151.703707 + 2 512.0 655.360017 585.142849 154.566038 3 640.0 706.206879 640.000002 160.000000 4 768.0 722.823517 664.216187 162.754967 .. ... ... ... ... - 93 12160.0 812.359066 406.179533 198.834951 - 94 12288.0 812.429770 415.222812 199.096718 - 95 12416.0 812.498981 412.149375 198.755369 - 96 12544.0 810.925276 412.971190 198.913776 - 97 12672.0 811.007961 411.888249 198.971549 + 93 12160.0 812.359066 405.333344 198.530610 + 94 12288.0 812.429770 415.661740 198.794749 + 95 12416.0 812.498981 411.296057 198.457532 + 96 12544.0 810.925276 412.971190 198.618504 + 97 12672.0 811.007961 412.097543 198.776477 [98 rows x 4 columns] @@ -306,7 +306,7 @@ In the above plot, we can see that: .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 3 minutes 21.149 seconds) + **Total running time of the script:** ( 3 minutes 23.140 seconds) .. _sphx_glr_download_getting-started_tutorials_02-fused-softmax.py: diff --git a/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt b/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt index 496844458..630f0851b 100644 --- a/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt +++ b/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt @@ -459,37 +459,37 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we matmul-performance: M cuBLAS ... Triton Triton (+ LeakyReLU) - 0 256.0 2.730667 ... 3.276800 2.978909 - 1 384.0 7.372800 ... 8.507077 8.507077 - 2 512.0 14.563555 ... 16.384000 16.384000 + 0 256.0 2.730667 ... 2.978909 2.978909 + 1 384.0 7.372800 ... 8.507077 7.899428 + 2 512.0 14.563555 ... 15.420235 15.420235 3 640.0 22.260869 ... 24.380953 24.380953 - 4 768.0 32.768000 ... 35.389441 34.028308 - 5 896.0 39.025776 ... 40.140799 39.025776 + 4 768.0 31.597714 ... 34.028308 34.028308 + 5 896.0 37.971025 ... 40.140799 39.025776 6 1024.0 49.932191 ... 53.773130 52.428801 7 1152.0 45.242181 ... 48.161033 47.396572 8 1280.0 51.200001 ... 57.690139 57.690139 9 1408.0 64.138541 ... 68.147202 66.485074 - 10 1536.0 80.430545 ... 81.355034 78.643199 - 11 1664.0 63.372618 ... 63.372618 62.492442 + 10 1536.0 79.526831 ... 81.355034 78.643199 + 11 1664.0 62.929456 ... 63.372618 62.492442 12 1792.0 72.983276 ... 73.460287 59.467852 - 13 1920.0 68.776119 ... 71.626943 70.892307 - 14 2048.0 73.262953 ... 78.398206 76.959706 - 15 2176.0 83.155572 ... 87.876193 85.998493 - 16 2304.0 68.251065 ... 78.064941 77.057651 - 17 2432.0 71.305746 ... 86.711310 84.115159 - 18 2560.0 77.833728 ... 82.539044 81.512437 - 19 2688.0 83.737433 ... 90.966561 89.044730 - 20 2816.0 83.712490 ... 84.523664 83.712490 - 21 2944.0 82.784108 ... 83.060049 82.646820 - 22 3072.0 82.661468 ... 90.020831 87.787755 - 23 3200.0 85.219705 ... 96.822991 95.952022 - 24 3328.0 83.613586 ... 86.528001 84.200347 - 25 3456.0 81.849303 ... 91.928814 88.595129 - 26 3584.0 86.540320 ... 99.463928 91.563533 - 27 3712.0 82.355598 ... 89.154253 81.283434 - 28 3840.0 84.874902 ... 93.722032 85.399230 - 29 3968.0 93.469117 ... 89.198780 86.175099 - 30 4096.0 92.436452 ... 86.313653 82.241256 + 13 1920.0 69.467336 ... 71.257735 70.892307 + 14 2048.0 73.262953 ... 78.033565 77.314362 + 15 2176.0 83.500614 ... 87.876193 85.998493 + 16 2304.0 68.446623 ... 77.810656 77.057651 + 17 2432.0 71.305746 ... 86.711310 85.393507 + 18 2560.0 77.833728 ... 82.539044 81.715711 + 19 2688.0 83.552988 ... 90.966561 88.422041 + 20 2816.0 84.035084 ... 84.687779 83.712490 + 21 2944.0 79.104810 ... 84.040530 82.646820 + 22 3072.0 78.972252 ... 89.593522 87.924073 + 23 3200.0 83.116885 ... 96.240602 94.674553 + 24 3328.0 82.369902 ... 85.398926 84.795401 + 25 3456.0 81.108217 ... 92.562076 85.767626 + 26 3584.0 87.296493 ... 97.628001 97.416461 + 27 3712.0 84.230479 ... 88.718781 87.552452 + 28 3840.0 80.313725 ... 90.835321 86.875096 + 29 3968.0 89.525997 ... 85.479929 89.855624 + 30 4096.0 91.616198 ... 92.214171 89.359338 [31 rows x 5 columns] @@ -499,7 +499,7 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 6 minutes 2.032 seconds) + **Total running time of the script:** ( 6 minutes 6.178 seconds) .. _sphx_glr_download_getting-started_tutorials_03-matrix-multiplication.py: diff --git a/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt b/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt index e1a328a7d..0f9d8dc7a 100644 --- a/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt +++ b/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt @@ -240,7 +240,7 @@ References .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 0 minutes 0.474 seconds) + **Total running time of the script:** ( 0 minutes 0.476 seconds) .. _sphx_glr_download_getting-started_tutorials_04-low-memory-dropout.py: diff --git a/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt b/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt index 40a10a93b..b5e161731 100644 --- a/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt +++ b/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt @@ -39,35 +39,35 @@ Layer Normalization layer-norm: N Triton Torch Apex 0 1024.0 585.142849 277.694907 468.114273 - 1 1536.0 630.153868 323.368435 511.999982 - 2 2048.0 682.666643 334.367358 520.126988 + 1 1536.0 614.400016 323.368435 511.999982 + 2 2048.0 682.666643 337.814445 520.126988 3 2560.0 694.237267 362.477870 512.000013 - 4 3072.0 712.347810 378.092307 501.551037 - 5 3584.0 725.873439 384.859062 451.527536 - 6 4096.0 728.177767 381.023256 451.972420 - 7 4608.0 676.403666 396.387087 431.157877 - 8 5120.0 688.403381 397.669909 422.268057 - 9 5632.0 704.000002 395.228063 415.262685 - 10 6144.0 702.171410 402.885254 411.313806 - 11 6656.0 700.631610 398.861429 400.360920 - 12 7168.0 686.754468 395.475867 386.154893 - 13 7680.0 682.666656 392.587863 386.415087 - 14 8192.0 642.509816 387.786968 368.179771 - 15 8704.0 630.153861 387.922008 378.434774 - 16 9216.0 609.322328 407.337026 383.002605 - 17 9728.0 589.575753 408.524944 383.369452 + 4 3072.0 712.347810 375.206126 496.484863 + 5 3584.0 725.873439 384.859062 455.111115 + 6 4096.0 728.177767 381.023256 458.293714 + 7 4608.0 670.254540 396.387087 421.302872 + 8 5120.0 694.237267 397.669909 420.102563 + 9 5632.0 704.000002 396.969169 413.357796 + 10 6144.0 702.171410 402.885254 409.600010 + 11 6656.0 705.271522 400.360920 400.360920 + 12 7168.0 690.891575 394.116833 387.459443 + 13 7680.0 682.666656 392.587863 387.634072 + 14 8192.0 636.271854 391.259714 371.308771 + 15 8704.0 633.018177 390.095225 380.502740 + 16 9216.0 611.850618 403.989025 381.023249 + 17 9728.0 591.817503 408.524944 382.427505 18 10240.0 566.920437 409.600010 382.803739 - 19 10752.0 549.623009 410.577576 381.445676 - 20 11264.0 534.789310 406.826188 374.686074 - 21 11776.0 523.377770 409.599991 376.831982 - 22 12288.0 516.031509 413.911572 383.251457 - 23 12800.0 505.679014 409.599981 376.470582 - 24 13312.0 494.180982 405.699062 376.976995 - 25 13824.0 481.882350 412.656711 379.389355 - 26 14336.0 471.967074 405.975225 372.969090 - 27 14848.0 461.297068 406.794504 375.304904 - 28 15360.0 454.269882 406.887417 377.511515 - 29 15872.0 447.887117 406.974373 376.225175 + 19 10752.0 551.384634 410.577576 380.601764 + 20 11264.0 536.380957 403.185684 370.831272 + 21 11776.0 523.377770 408.711507 377.587162 + 22 12288.0 517.389457 413.042029 382.505826 + 23 12800.0 505.679014 410.420828 377.163903 + 24 13312.0 494.754948 404.927765 376.976995 + 25 13824.0 482.934503 409.600016 378.092325 + 26 14336.0 471.967074 403.121247 373.576536 + 27 14848.0 461.297068 406.099164 375.304904 + 28 15360.0 454.269882 406.214870 378.092307 + 29 15872.0 447.098578 408.282944 376.783377 @@ -389,7 +389,7 @@ Layer Normalization .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 5 minutes 22.534 seconds) + **Total running time of the script:** ( 5 minutes 24.882 seconds) .. _sphx_glr_download_getting-started_tutorials_05-layer-norm.py: diff --git a/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt b/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt index 9af8fe93f..adb9e2157 100644 --- a/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt +++ b/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt @@ -5,16 +5,16 @@ Computation times ================= -**16:24.265** total execution time for **getting-started_tutorials** files: +**16:35.469** total execution time for **getting-started_tutorials** files: +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 06:02.032 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 06:06.178 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 05:22.534 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 05:24.882 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:21.149 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:23.140 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:38.076 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:40.793 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) | 00:00.474 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) | 00:00.476 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ diff --git a/master/getting-started/tutorials/01-vector-add.html b/master/getting-started/tutorials/01-vector-add.html index 30e6c0529..286e42028 100644 --- a/master/getting-started/tutorials/01-vector-add.html +++ b/master/getting-started/tutorials/01-vector-add.html @@ -335,12 +335,12 @@ for different problem sizes.

10 4194304.0 780.190482 780.190482 11 8388608.0 812.429770 812.429770 12 16777216.0 833.084721 833.084721 -13 33554432.0 842.004273 843.811163 +13 33554432.0 842.004273 842.004273 14 67108864.0 847.448255 848.362445 15 134217728.0 849.737435 850.656574 -

Total running time of the script: ( 1 minutes 38.076 seconds)

+

Total running time of the script: ( 1 minutes 40.793 seconds)