diff --git a/master/.buildinfo b/master/.buildinfo index 72ec0780c..fb7c524af 100644 --- a/master/.buildinfo +++ b/master/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: b8d6e434bf0cea6b075c4f17e1d5f683 +config: 129fe056d957bceb5eb7005a87aa6582 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/master/.doctrees/environment.pickle b/master/.doctrees/environment.pickle index 33f4448bd..22ce875ae 100644 Binary files a/master/.doctrees/environment.pickle and b/master/.doctrees/environment.pickle differ diff --git a/master/.doctrees/getting-started/installation.doctree b/master/.doctrees/getting-started/installation.doctree index 4e8dbde25..69f41b558 100644 Binary files a/master/.doctrees/getting-started/installation.doctree and b/master/.doctrees/getting-started/installation.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/01-vector-add.doctree b/master/.doctrees/getting-started/tutorials/01-vector-add.doctree index b0bac0278..9c6eadb12 100644 Binary files a/master/.doctrees/getting-started/tutorials/01-vector-add.doctree and b/master/.doctrees/getting-started/tutorials/01-vector-add.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree b/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree index 43aae2e65..f45a0662d 100644 Binary files a/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree and b/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree b/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree index 17ba33c20..4a7f92923 100644 Binary files a/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree and b/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree b/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree index 83b307eb8..5c33be468 100644 Binary files a/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree and b/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree b/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree index cd1168bb8..5b612456d 100644 Binary files a/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree and b/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/index.doctree b/master/.doctrees/getting-started/tutorials/index.doctree index 1a9951b93..2eb7905fa 100644 Binary files a/master/.doctrees/getting-started/tutorials/index.doctree and b/master/.doctrees/getting-started/tutorials/index.doctree differ diff --git a/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree b/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree index ea2358f7d..c04fcff95 100644 Binary files a/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree and b/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree differ diff --git a/master/.doctrees/index.doctree b/master/.doctrees/index.doctree index 25b617730..1ba38d7dd 100644 Binary files a/master/.doctrees/index.doctree and b/master/.doctrees/index.doctree differ diff --git a/master/.doctrees/programming-guide/chapter-1/introduction.doctree b/master/.doctrees/programming-guide/chapter-1/introduction.doctree index 315edd43c..4a886c025 100644 Binary files a/master/.doctrees/programming-guide/chapter-1/introduction.doctree and b/master/.doctrees/programming-guide/chapter-1/introduction.doctree differ diff --git a/master/.doctrees/programming-guide/chapter-2/related-work.doctree b/master/.doctrees/programming-guide/chapter-2/related-work.doctree index 84f922b20..794bb44cc 100644 Binary files a/master/.doctrees/programming-guide/chapter-2/related-work.doctree and b/master/.doctrees/programming-guide/chapter-2/related-work.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.Config.doctree b/master/.doctrees/python-api/generated/triton.Config.doctree index f7eb29acd..52bf7cc02 100644 Binary files a/master/.doctrees/python-api/generated/triton.Config.doctree and b/master/.doctrees/python-api/generated/triton.Config.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.autotune.doctree b/master/.doctrees/python-api/generated/triton.autotune.doctree index e7f46c00b..0537a7ebd 100644 Binary files a/master/.doctrees/python-api/generated/triton.autotune.doctree and b/master/.doctrees/python-api/generated/triton.autotune.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.heuristics.doctree b/master/.doctrees/python-api/generated/triton.heuristics.doctree index a78e4de13..e2f4ba1e6 100644 Binary files a/master/.doctrees/python-api/generated/triton.heuristics.doctree and b/master/.doctrees/python-api/generated/triton.heuristics.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.jit.doctree b/master/.doctrees/python-api/generated/triton.jit.doctree index 30f4fcc6c..1a3dac78a 100644 Binary files a/master/.doctrees/python-api/generated/triton.jit.doctree and b/master/.doctrees/python-api/generated/triton.jit.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.arange.doctree b/master/.doctrees/python-api/generated/triton.language.arange.doctree index 92a01e204..4337f62ff 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.arange.doctree and b/master/.doctrees/python-api/generated/triton.language.arange.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree index 8aaf4f591..b223291ff 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree index ef3a4effa..5999317c0 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree index 9ffa73716..c5106911e 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree index 75ccfeeeb..9d298744f 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree b/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree index c41023337..157f636cd 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree and b/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree b/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree index 93313e906..94a11a263 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree and b/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.cos.doctree b/master/.doctrees/python-api/generated/triton.language.cos.doctree index 30b01cea0..b7bef27bd 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.cos.doctree and b/master/.doctrees/python-api/generated/triton.language.cos.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.dot.doctree b/master/.doctrees/python-api/generated/triton.language.dot.doctree index 978b6cfd5..da0d5dafd 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.dot.doctree and b/master/.doctrees/python-api/generated/triton.language.dot.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.exp.doctree b/master/.doctrees/python-api/generated/triton.language.exp.doctree index 4810bf987..73452a9b1 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.exp.doctree and b/master/.doctrees/python-api/generated/triton.language.exp.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.load.doctree b/master/.doctrees/python-api/generated/triton.language.load.doctree index 9916a09eb..c339fded6 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.load.doctree and b/master/.doctrees/python-api/generated/triton.language.load.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.log.doctree b/master/.doctrees/python-api/generated/triton.language.log.doctree index fb1aaf6c8..376937376 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.log.doctree and b/master/.doctrees/python-api/generated/triton.language.log.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.max.doctree b/master/.doctrees/python-api/generated/triton.language.max.doctree index 2ad96c4a5..dfb589ed6 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.max.doctree and b/master/.doctrees/python-api/generated/triton.language.max.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.maximum.doctree b/master/.doctrees/python-api/generated/triton.language.maximum.doctree index 61e96b61b..9884d7611 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.maximum.doctree and b/master/.doctrees/python-api/generated/triton.language.maximum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.min.doctree b/master/.doctrees/python-api/generated/triton.language.min.doctree index 3c41e2b18..51b559a84 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.min.doctree and b/master/.doctrees/python-api/generated/triton.language.min.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.minimum.doctree b/master/.doctrees/python-api/generated/triton.language.minimum.doctree index 6056221df..853ebc899 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.minimum.doctree and b/master/.doctrees/python-api/generated/triton.language.minimum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree b/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree index 58458832d..fd15fdd77 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree and b/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.num_programs.doctree b/master/.doctrees/python-api/generated/triton.language.num_programs.doctree index 41a8e0126..014fccdf9 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.num_programs.doctree and b/master/.doctrees/python-api/generated/triton.language.num_programs.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.program_id.doctree b/master/.doctrees/python-api/generated/triton.language.program_id.doctree index 336e6c664..27d26fa16 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.program_id.doctree and b/master/.doctrees/python-api/generated/triton.language.program_id.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.rand.doctree b/master/.doctrees/python-api/generated/triton.language.rand.doctree index 496c4240f..7056cffff 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.rand.doctree and b/master/.doctrees/python-api/generated/triton.language.rand.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randint.doctree b/master/.doctrees/python-api/generated/triton.language.randint.doctree index 148cc9d10..1d409a3d6 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randint.doctree and b/master/.doctrees/python-api/generated/triton.language.randint.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randint4x.doctree b/master/.doctrees/python-api/generated/triton.language.randint4x.doctree index 0ac6badd0..20f5fbc6d 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randint4x.doctree and b/master/.doctrees/python-api/generated/triton.language.randint4x.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.randn.doctree b/master/.doctrees/python-api/generated/triton.language.randn.doctree index af2776a50..a7d702e74 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.randn.doctree and b/master/.doctrees/python-api/generated/triton.language.randn.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.ravel.doctree b/master/.doctrees/python-api/generated/triton.language.ravel.doctree index 195deed72..b124d3a4f 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.ravel.doctree and b/master/.doctrees/python-api/generated/triton.language.ravel.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.reshape.doctree b/master/.doctrees/python-api/generated/triton.language.reshape.doctree index 4f6877a32..210c3b64a 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.reshape.doctree and b/master/.doctrees/python-api/generated/triton.language.reshape.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree b/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree index 7dc52f4b9..8f55d7fb9 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree and b/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sin.doctree b/master/.doctrees/python-api/generated/triton.language.sin.doctree index 728882a1a..952ccff31 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sin.doctree and b/master/.doctrees/python-api/generated/triton.language.sin.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.softmax.doctree b/master/.doctrees/python-api/generated/triton.language.softmax.doctree index 5f56e0358..931e58709 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.softmax.doctree and b/master/.doctrees/python-api/generated/triton.language.softmax.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sqrt.doctree b/master/.doctrees/python-api/generated/triton.language.sqrt.doctree index 2ba75d669..7b143a344 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sqrt.doctree and b/master/.doctrees/python-api/generated/triton.language.sqrt.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.store.doctree b/master/.doctrees/python-api/generated/triton.language.store.doctree index 9cac4421d..9a8088019 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.store.doctree and b/master/.doctrees/python-api/generated/triton.language.store.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.sum.doctree b/master/.doctrees/python-api/generated/triton.language.sum.doctree index ac764a42b..deda59db3 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.sum.doctree and b/master/.doctrees/python-api/generated/triton.language.sum.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.where.doctree b/master/.doctrees/python-api/generated/triton.language.where.doctree index 5007b347d..eb90ee6ab 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.where.doctree and b/master/.doctrees/python-api/generated/triton.language.where.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.language.zeros.doctree b/master/.doctrees/python-api/generated/triton.language.zeros.doctree index 490c97888..dfa29de29 100644 Binary files a/master/.doctrees/python-api/generated/triton.language.zeros.doctree and b/master/.doctrees/python-api/generated/triton.language.zeros.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree b/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree index 79f20c11c..493a0badd 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree and b/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree b/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree index fd91dae96..b95b05649 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree and b/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree differ diff --git a/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree b/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree index 247f09af9..876e09aa2 100644 Binary files a/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree and b/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree differ diff --git a/master/.doctrees/python-api/triton.doctree b/master/.doctrees/python-api/triton.doctree index db0f4c5ae..fbb48fd5d 100644 Binary files a/master/.doctrees/python-api/triton.doctree and b/master/.doctrees/python-api/triton.doctree differ diff --git a/master/.doctrees/python-api/triton.language.doctree b/master/.doctrees/python-api/triton.language.doctree index a47d188f5..1665f687c 100644 Binary files a/master/.doctrees/python-api/triton.language.doctree and b/master/.doctrees/python-api/triton.language.doctree differ diff --git a/master/.doctrees/python-api/triton.testing.doctree b/master/.doctrees/python-api/triton.testing.doctree index 04558c930..3e8e256ce 100644 Binary files a/master/.doctrees/python-api/triton.testing.doctree and b/master/.doctrees/python-api/triton.testing.doctree differ diff --git a/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip b/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip index 2563b5a09..7dec37ece 100644 Binary files a/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip and b/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip differ diff --git a/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip b/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip index 9944c8df4..93b0c5a77 100644 Binary files a/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip and b/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip differ diff --git a/master/_images/sphx_glr_01-vector-add_001.png b/master/_images/sphx_glr_01-vector-add_001.png index 38343430d..dcf198269 100644 Binary files a/master/_images/sphx_glr_01-vector-add_001.png and b/master/_images/sphx_glr_01-vector-add_001.png differ diff --git a/master/_images/sphx_glr_01-vector-add_thumb.png b/master/_images/sphx_glr_01-vector-add_thumb.png index 46f161b20..b02d989c3 100644 Binary files a/master/_images/sphx_glr_01-vector-add_thumb.png and b/master/_images/sphx_glr_01-vector-add_thumb.png differ diff --git a/master/_images/sphx_glr_02-fused-softmax_001.png b/master/_images/sphx_glr_02-fused-softmax_001.png index 3d6ef7ae2..ff114e19b 100644 Binary files a/master/_images/sphx_glr_02-fused-softmax_001.png and b/master/_images/sphx_glr_02-fused-softmax_001.png differ diff --git a/master/_images/sphx_glr_02-fused-softmax_thumb.png b/master/_images/sphx_glr_02-fused-softmax_thumb.png index 3e9f9170e..3b1becdc4 100644 Binary files a/master/_images/sphx_glr_02-fused-softmax_thumb.png and b/master/_images/sphx_glr_02-fused-softmax_thumb.png differ diff --git a/master/_images/sphx_glr_03-matrix-multiplication_001.png b/master/_images/sphx_glr_03-matrix-multiplication_001.png index d989c4e45..f1ba6c1a7 100644 Binary files a/master/_images/sphx_glr_03-matrix-multiplication_001.png and b/master/_images/sphx_glr_03-matrix-multiplication_001.png differ diff --git a/master/_images/sphx_glr_03-matrix-multiplication_thumb.png b/master/_images/sphx_glr_03-matrix-multiplication_thumb.png index d14b6023d..02ff4e548 100644 Binary files a/master/_images/sphx_glr_03-matrix-multiplication_thumb.png and b/master/_images/sphx_glr_03-matrix-multiplication_thumb.png differ diff --git a/master/_images/sphx_glr_05-layer-norm_001.png b/master/_images/sphx_glr_05-layer-norm_001.png index d72fdacdc..6ce0b41cc 100644 Binary files a/master/_images/sphx_glr_05-layer-norm_001.png and b/master/_images/sphx_glr_05-layer-norm_001.png differ diff --git a/master/_images/sphx_glr_05-layer-norm_thumb.png b/master/_images/sphx_glr_05-layer-norm_thumb.png index f8546e407..787da668f 100644 Binary files a/master/_images/sphx_glr_05-layer-norm_thumb.png and b/master/_images/sphx_glr_05-layer-norm_thumb.png differ diff --git a/master/_sources/getting-started/tutorials/01-vector-add.rst.txt b/master/_sources/getting-started/tutorials/01-vector-add.rst.txt index 5ddde3123..fe069bd79 100644 --- a/master/_sources/getting-started/tutorials/01-vector-add.rst.txt +++ b/master/_sources/getting-started/tutorials/01-vector-add.rst.txt @@ -232,20 +232,20 @@ We can now run the decorated function above. Pass `print_data=True` to see the p vector-add-performance: size Triton Torch - 0 4096.0 9.600000 9.600000 + 0 4096.0 8.000000 9.600000 1 8192.0 19.200000 19.200000 2 16384.0 38.400001 38.400001 3 32768.0 76.800002 76.800002 4 65536.0 127.999995 127.999995 5 131072.0 219.428568 219.428568 - 6 262144.0 341.333321 341.333321 + 6 262144.0 341.333321 384.000001 7 524288.0 472.615390 472.615390 8 1048576.0 614.400016 614.400016 - 9 2097152.0 722.823517 702.171410 + 9 2097152.0 722.823517 722.823517 10 4194304.0 780.190482 780.190482 11 8388608.0 812.429770 812.429770 12 16777216.0 833.084721 833.084721 - 13 33554432.0 842.004273 843.811163 + 13 33554432.0 842.004273 842.004273 14 67108864.0 847.448255 848.362445 15 134217728.0 849.737435 850.656574 @@ -255,7 +255,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 1 minutes 47.140 seconds) + **Total running time of the script:** ( 1 minutes 46.935 seconds) .. _sphx_glr_download_getting-started_tutorials_01-vector-add.py: diff --git a/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt b/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt index b7b7fc8f4..54858bd9f 100644 --- a/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt +++ b/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt @@ -278,17 +278,17 @@ We will then compare its performance against (1) :code:`torch.softmax` and (2) t softmax-performance: N Triton Torch (native) Torch (jit) - 0 256.0 546.133347 512.000001 186.181817 - 1 384.0 585.142862 585.142862 151.703707 + 0 256.0 512.000001 546.133347 188.321838 + 1 384.0 585.142862 558.545450 151.703707 2 512.0 655.360017 606.814814 154.566038 3 640.0 682.666684 640.000002 158.759699 - 4 768.0 722.823517 664.216187 162.754967 + 4 768.0 722.823517 664.216187 163.839992 .. ... ... ... ... - 93 12160.0 814.058574 405.755985 198.936606 + 93 12160.0 814.058574 405.755985 198.733401 94 12288.0 814.111783 415.661740 198.995960 - 95 12416.0 814.163950 411.296057 198.805107 - 96 12544.0 814.214963 412.971190 198.913776 - 97 12672.0 814.265046 411.888249 198.971549 + 95 12416.0 814.163950 411.722274 198.705656 + 96 12544.0 814.214963 412.971190 198.815254 + 97 12672.0 814.265046 411.679167 198.971549 [98 rows x 4 columns] @@ -306,7 +306,7 @@ In the above plot, we can see that: .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 3 minutes 27.130 seconds) + **Total running time of the script:** ( 3 minutes 27.513 seconds) .. _sphx_glr_download_getting-started_tutorials_02-fused-softmax.py: diff --git a/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt b/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt index 83692fe09..f3a2dc1a2 100644 --- a/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt +++ b/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt @@ -458,37 +458,37 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we matmul-performance: M cuBLAS ... Triton Triton (+ LeakyReLU) - 0 256.0 2.730667 ... 3.276800 2.978909 - 1 384.0 7.372800 ... 8.507077 8.507077 - 2 512.0 14.563555 ... 16.384000 16.384000 + 0 256.0 2.730667 ... 2.978909 2.978909 + 1 384.0 7.372800 ... 7.899428 7.899428 + 2 512.0 14.563555 ... 15.420235 15.420235 3 640.0 22.260869 ... 24.380953 24.380953 4 768.0 32.768000 ... 35.389441 34.028308 - 5 896.0 37.971025 ... 40.140799 40.140799 - 6 1024.0 49.932191 ... 53.773130 53.773130 + 5 896.0 37.971025 ... 40.140799 39.025776 + 6 1024.0 49.932191 ... 53.773130 52.428801 7 1152.0 45.242181 ... 48.161033 47.396572 8 1280.0 51.200001 ... 57.690139 57.690139 - 9 1408.0 64.138541 ... 68.147202 68.147202 - 10 1536.0 80.430545 ... 81.355034 79.526831 - 11 1664.0 62.929456 ... 63.372618 62.929456 + 9 1408.0 64.138541 ... 69.009825 68.147202 + 10 1536.0 80.430545 ... 80.430545 80.430545 + 11 1664.0 63.372618 ... 63.372618 63.372618 12 1792.0 72.983276 ... 63.499573 63.142831 - 13 1920.0 68.776119 ... 71.257735 71.257735 + 13 1920.0 69.120002 ... 71.626943 70.892307 14 2048.0 73.262953 ... 78.033565 77.672296 15 2176.0 83.155572 ... 87.115360 86.739860 - 16 2304.0 68.251065 ... 78.064941 77.810656 + 16 2304.0 68.251065 ... 78.064941 77.558029 17 2432.0 71.305746 ... 75.726318 75.522751 - 18 2560.0 77.833728 ... 82.125311 81.715711 - 19 2688.0 83.369354 ... 90.748936 90.748936 - 20 2816.0 82.290955 ... 84.523664 84.360174 - 21 2944.0 82.373605 ... 83.617504 82.373605 - 22 3072.0 82.062468 ... 89.735509 88.473602 - 23 3200.0 80.503145 ... 94.674553 95.380032 - 24 3328.0 82.369902 ... 86.320498 86.736504 - 25 3456.0 78.578525 ... 84.332184 87.823058 - 26 3584.0 87.381330 ... 99.684470 99.463928 - 27 3712.0 83.247783 ... 89.916604 87.018592 - 28 3840.0 84.874902 ... 92.275341 86.332554 - 29 3968.0 92.864488 ... 86.664727 92.512459 - 30 4096.0 86.592080 ... 87.552332 93.271527 + 18 2560.0 77.833728 ... 82.125311 82.125311 + 19 2688.0 83.552988 ... 90.316801 90.966561 + 20 2816.0 83.233226 ... 83.233226 84.278666 + 21 2944.0 81.166173 ... 83.617504 84.182483 + 22 3072.0 81.825298 ... 89.451983 89.451983 + 23 3200.0 83.989503 ... 96.530922 94.256261 + 24 3328.0 83.905938 ... 86.946008 86.217120 + 25 3456.0 80.300370 ... 92.350019 88.497878 + 26 3584.0 87.381330 ... 98.053863 99.463928 + 27 3712.0 85.528545 ... 87.629253 88.326564 + 28 3840.0 82.716526 ... 88.971840 91.625518 + 29 3968.0 87.004591 ... 92.652949 85.841672 + 30 4096.0 93.727466 ... 89.092421 87.495257 [31 rows x 5 columns] @@ -498,7 +498,7 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 6 minutes 16.209 seconds) + **Total running time of the script:** ( 6 minutes 7.255 seconds) .. _sphx_glr_download_getting-started_tutorials_03-matrix-multiplication.py: diff --git a/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt b/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt index d1778b13a..1644ef41a 100644 --- a/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt +++ b/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt @@ -240,7 +240,7 @@ References .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 0 minutes 0.012 seconds) + **Total running time of the script:** ( 0 minutes 0.014 seconds) .. _sphx_glr_download_getting-started_tutorials_04-low-memory-dropout.py: diff --git a/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt b/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt index 661a15717..e8154c5c6 100644 --- a/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt +++ b/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt @@ -38,36 +38,36 @@ Layer Normalization layer-norm-backward: N Triton Torch Apex - 0 1024.0 361.411758 99.497980 311.088617 - 1 1536.0 405.098894 133.083026 344.523365 - 2 2048.0 496.484863 158.045011 334.367350 - 3 2560.0 461.954908 182.857144 330.322572 - 4 3072.0 519.211251 191.501303 320.556515 - 5 3584.0 554.941930 208.271186 308.301075 - 6 4096.0 564.965515 220.412561 298.796351 - 7 4608.0 500.416301 232.336141 286.507772 - 8 5120.0 529.655159 243.809526 286.433562 - 9 5632.0 542.843364 244.426754 291.310338 - 10 6144.0 548.163546 251.202731 286.879370 - 11 6656.0 534.260858 256.410903 286.793541 - 12 7168.0 513.528374 253.360829 277.470965 - 13 7680.0 486.332448 266.743841 284.884090 - 14 8192.0 468.114289 258.694729 277.694924 - 15 8704.0 414.476194 267.472468 286.158893 - 16 9216.0 428.651187 273.066667 289.507855 - 17 9728.0 438.857162 279.942444 288.593329 - 18 10240.0 445.217381 287.102804 290.153487 - 19 10752.0 427.231788 246.699797 289.941565 - 20 11264.0 427.071098 246.432094 288.204696 - 21 11776.0 421.826879 249.667843 289.573776 - 22 12288.0 417.131525 254.673582 294.323369 - 23 12800.0 411.244989 253.989249 290.084977 - 24 13312.0 409.599999 253.160074 289.391298 - 25 13824.0 405.842204 256.991469 292.313649 - 26 14336.0 395.475867 255.619613 288.402346 - 27 14848.0 383.999990 256.922861 288.544136 - 28 15360.0 380.041240 258.513318 288.225185 - 29 15872.0 372.000001 261.446802 289.239176 + 0 1024.0 361.411758 97.912354 303.407414 + 1 1536.0 405.098894 134.540150 341.333333 + 2 2048.0 491.520012 161.154101 334.367350 + 3 2560.0 465.454542 181.238943 330.322572 + 4 3072.0 519.211251 192.501302 323.368415 + 5 3584.0 558.545477 208.271186 311.652167 + 6 4096.0 564.965515 220.907859 298.796351 + 7 4608.0 502.690905 232.825259 287.251954 + 8 5120.0 527.381977 242.366855 284.444444 + 9 5632.0 542.843364 243.107920 289.438969 + 10 6144.0 546.133354 248.661056 286.322318 + 11 6656.0 527.207907 256.000009 285.767438 + 12 7168.0 503.017523 259.867079 284.821192 + 13 7680.0 482.513091 263.314295 280.121579 + 14 8192.0 463.698115 266.767970 284.526763 + 15 8704.0 414.476194 267.815384 284.599455 + 16 9216.0 427.822068 271.724806 287.999990 + 17 9728.0 437.213490 280.615388 290.027323 + 18 10240.0 446.836366 286.433562 290.496460 + 19 10752.0 429.364408 246.935876 290.267711 + 20 11264.0 426.397479 245.313973 286.980888 + 21 11776.0 420.571432 249.667843 288.981596 + 22 12288.0 416.542386 254.673582 294.323369 + 23 12800.0 411.244989 253.779426 289.811310 + 24 13312.0 409.075539 252.959629 290.443638 + 25 13824.0 405.098897 257.390218 292.056329 + 26 14336.0 395.021816 255.051144 286.719986 + 27 14848.0 386.080180 257.852379 289.481735 + 28 15360.0 380.433442 257.970599 286.879376 + 29 15872.0 371.094003 261.626369 289.679087 @@ -339,7 +339,7 @@ Layer Normalization .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 2 minutes 13.538 seconds) + **Total running time of the script:** ( 2 minutes 14.074 seconds) .. _sphx_glr_download_getting-started_tutorials_05-layer-norm.py: diff --git a/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt b/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt index ed0d5b820..4b7972935 100644 --- a/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt +++ b/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt @@ -5,16 +5,16 @@ Computation times ================= -**13:44.030** total execution time for **getting-started_tutorials** files: +**13:35.790** total execution time for **getting-started_tutorials** files: +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 06:16.209 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 06:07.255 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:27.130 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:27.513 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 02:13.538 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 02:14.074 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:47.140 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:46.935 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) | 00:00.012 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) | 00:00.014 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ diff --git a/master/getting-started/tutorials/01-vector-add.html b/master/getting-started/tutorials/01-vector-add.html index 58398594e..2f023868b 100644 --- a/master/getting-started/tutorials/01-vector-add.html +++ b/master/getting-started/tutorials/01-vector-add.html @@ -322,25 +322,25 @@ for different problem sizes.

Out:

vector-add-performance:
            size      Triton       Torch
-0        4096.0    9.600000    9.600000
+0        4096.0    8.000000    9.600000
 1        8192.0   19.200000   19.200000
 2       16384.0   38.400001   38.400001
 3       32768.0   76.800002   76.800002
 4       65536.0  127.999995  127.999995
 5      131072.0  219.428568  219.428568
-6      262144.0  341.333321  341.333321
+6      262144.0  341.333321  384.000001
 7      524288.0  472.615390  472.615390
 8     1048576.0  614.400016  614.400016
-9     2097152.0  722.823517  702.171410
+9     2097152.0  722.823517  722.823517
 10    4194304.0  780.190482  780.190482
 11    8388608.0  812.429770  812.429770
 12   16777216.0  833.084721  833.084721
-13   33554432.0  842.004273  843.811163
+13   33554432.0  842.004273  842.004273
 14   67108864.0  847.448255  848.362445
 15  134217728.0  849.737435  850.656574
 
-

Total running time of the script: ( 1 minutes 47.140 seconds)

+

Total running time of the script: ( 1 minutes 46.935 seconds)