diff --git a/v1.1.2/.buildinfo b/v1.1.2/.buildinfo index 6b5ddd235..d9d1e670e 100644 --- a/v1.1.2/.buildinfo +++ b/v1.1.2/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 3b893644f95de1dc720267b27aa33f8c +config: f3d1ccbddb6a63c7385d319834b5692f tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/v1.1.2/.doctrees/environment.pickle b/v1.1.2/.doctrees/environment.pickle index 863fac586..d2ddfd26c 100644 Binary files a/v1.1.2/.doctrees/environment.pickle and b/v1.1.2/.doctrees/environment.pickle differ diff --git a/v1.1.2/.doctrees/getting-started/installation.doctree b/v1.1.2/.doctrees/getting-started/installation.doctree index 0fdef684e..1503ed4ce 100644 Binary files a/v1.1.2/.doctrees/getting-started/installation.doctree and b/v1.1.2/.doctrees/getting-started/installation.doctree differ diff --git a/v1.1.2/.doctrees/getting-started/tutorials/01-vector-add.doctree b/v1.1.2/.doctrees/getting-started/tutorials/01-vector-add.doctree index 8debe5c1f..41ea5f71e 100644 Binary files a/v1.1.2/.doctrees/getting-started/tutorials/01-vector-add.doctree and b/v1.1.2/.doctrees/getting-started/tutorials/01-vector-add.doctree differ diff --git a/v1.1.2/.doctrees/getting-started/tutorials/02-fused-softmax.doctree b/v1.1.2/.doctrees/getting-started/tutorials/02-fused-softmax.doctree index fa1231a08..84867e6ad 100644 Binary files a/v1.1.2/.doctrees/getting-started/tutorials/02-fused-softmax.doctree and b/v1.1.2/.doctrees/getting-started/tutorials/02-fused-softmax.doctree differ diff --git a/v1.1.2/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree b/v1.1.2/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree index 2b8b0a6b4..6e9c8a4ae 100644 Binary files a/v1.1.2/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree and b/v1.1.2/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree differ diff --git a/v1.1.2/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree b/v1.1.2/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree index 0705549d8..38db01f44 100644 Binary files a/v1.1.2/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree and b/v1.1.2/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree differ diff --git a/v1.1.2/.doctrees/getting-started/tutorials/05-layer-norm.doctree b/v1.1.2/.doctrees/getting-started/tutorials/05-layer-norm.doctree index a3dddb06c..bc40e4bbf 100644 Binary files a/v1.1.2/.doctrees/getting-started/tutorials/05-layer-norm.doctree and b/v1.1.2/.doctrees/getting-started/tutorials/05-layer-norm.doctree differ diff --git a/v1.1.2/.doctrees/getting-started/tutorials/index.doctree b/v1.1.2/.doctrees/getting-started/tutorials/index.doctree index 28c8680ba..3725b99c1 100644 Binary files a/v1.1.2/.doctrees/getting-started/tutorials/index.doctree and b/v1.1.2/.doctrees/getting-started/tutorials/index.doctree differ diff --git a/v1.1.2/.doctrees/getting-started/tutorials/sg_execution_times.doctree b/v1.1.2/.doctrees/getting-started/tutorials/sg_execution_times.doctree index 7d5bee5f6..eab12f873 100644 Binary files a/v1.1.2/.doctrees/getting-started/tutorials/sg_execution_times.doctree and b/v1.1.2/.doctrees/getting-started/tutorials/sg_execution_times.doctree differ diff --git a/v1.1.2/.doctrees/index.doctree b/v1.1.2/.doctrees/index.doctree index f193ffc6a..2a33d2621 100644 Binary files a/v1.1.2/.doctrees/index.doctree and b/v1.1.2/.doctrees/index.doctree differ diff --git a/v1.1.2/.doctrees/programming-guide/chapter-1/introduction.doctree b/v1.1.2/.doctrees/programming-guide/chapter-1/introduction.doctree index a434ed199..510b9b8a9 100644 Binary files a/v1.1.2/.doctrees/programming-guide/chapter-1/introduction.doctree and b/v1.1.2/.doctrees/programming-guide/chapter-1/introduction.doctree differ diff --git a/v1.1.2/.doctrees/programming-guide/chapter-2/related-work.doctree b/v1.1.2/.doctrees/programming-guide/chapter-2/related-work.doctree index 83316a9bd..795999837 100644 Binary files a/v1.1.2/.doctrees/programming-guide/chapter-2/related-work.doctree and b/v1.1.2/.doctrees/programming-guide/chapter-2/related-work.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.Config.doctree b/v1.1.2/.doctrees/python-api/generated/triton.Config.doctree index 6a77604fa..65c02d840 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.Config.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.Config.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.autotune.doctree b/v1.1.2/.doctrees/python-api/generated/triton.autotune.doctree index 49e7699f8..8c5c9f20e 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.autotune.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.autotune.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.heuristics.doctree b/v1.1.2/.doctrees/python-api/generated/triton.heuristics.doctree index db24ac7d8..31bae86b6 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.heuristics.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.heuristics.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.jit.doctree b/v1.1.2/.doctrees/python-api/generated/triton.jit.doctree index ee6d06702..290957ae4 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.jit.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.jit.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.arange.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.arange.doctree index 954e0868f..37e742b57 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.arange.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.arange.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_add.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_add.doctree index b6e56a009..a2edc598f 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_add.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_add.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_cas.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_cas.doctree index 42e897fb6..63d0645c2 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_cas.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_cas.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_max.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_max.doctree index b57a4db1e..904b06d83 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_max.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_max.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_min.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_min.doctree index f895bf74e..3f791191d 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_min.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_min.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree index 3c0b31f79..fdfd4c6ca 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.broadcast_to.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.broadcast_to.doctree index 8b18763ff..15b708cee 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.broadcast_to.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.broadcast_to.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.cos.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.cos.doctree index b4f670dbc..55ee49cdf 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.cos.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.cos.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.dot.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.dot.doctree index 08738f005..84707756f 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.dot.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.dot.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.exp.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.exp.doctree index 64ac21de7..3810c72ce 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.exp.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.exp.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.load.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.load.doctree index e236a2124..f8e6ef6e8 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.load.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.load.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.log.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.log.doctree index 2608267d0..7ae6e3b51 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.log.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.log.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.max.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.max.doctree index e8a59e0b9..f57fc0335 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.max.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.max.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.maximum.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.maximum.doctree index 92839b798..f8792863e 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.maximum.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.maximum.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.min.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.min.doctree index ba8bf5c24..7cf8347f0 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.min.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.min.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.minimum.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.minimum.doctree index 02df0e795..6aed502c7 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.minimum.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.minimum.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.multiple_of.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.multiple_of.doctree index 77e2164dc..ffe631d26 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.multiple_of.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.multiple_of.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.num_programs.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.num_programs.doctree index a2167f0e2..815813827 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.num_programs.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.num_programs.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.program_id.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.program_id.doctree index d902eb545..addcc8b2f 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.program_id.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.program_id.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.rand.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.rand.doctree index 59f8b2f58..892b001c3 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.rand.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.rand.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.randint.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.randint.doctree index 427b18daf..40c1272c1 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.randint.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.randint.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.randint4x.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.randint4x.doctree index a3a62c895..aaa1546a5 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.randint4x.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.randint4x.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.randn.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.randn.doctree index 1c17f99ac..794588a46 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.randn.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.randn.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.ravel.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.ravel.doctree index 569216fd6..ae8910ed9 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.ravel.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.ravel.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.reshape.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.reshape.doctree index 7d68e69b2..fefd08066 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.reshape.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.reshape.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.sigmoid.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.sigmoid.doctree index ff270e74d..60a38b124 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.sigmoid.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.sigmoid.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.sin.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.sin.doctree index fca47f53c..80808df5b 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.sin.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.sin.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.softmax.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.softmax.doctree index fab3f87e4..046615c4d 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.softmax.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.softmax.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.sqrt.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.sqrt.doctree index 366e88f9b..b8a43dd46 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.sqrt.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.sqrt.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.store.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.store.doctree index 7be370c06..c3bd3c6e3 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.store.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.store.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.sum.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.sum.doctree index 336a359da..32e27f5a4 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.sum.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.sum.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.where.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.where.doctree index 354cc11ae..1cfabc859 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.where.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.where.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.language.zeros.doctree b/v1.1.2/.doctrees/python-api/generated/triton.language.zeros.doctree index 6fd44c3e3..dd8ad3f49 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.language.zeros.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.language.zeros.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.testing.Benchmark.doctree b/v1.1.2/.doctrees/python-api/generated/triton.testing.Benchmark.doctree index 0e2f9aa65..23e297f3b 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.testing.Benchmark.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.testing.Benchmark.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.testing.do_bench.doctree b/v1.1.2/.doctrees/python-api/generated/triton.testing.do_bench.doctree index 762bff243..0be102199 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.testing.do_bench.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.testing.do_bench.doctree differ diff --git a/v1.1.2/.doctrees/python-api/generated/triton.testing.perf_report.doctree b/v1.1.2/.doctrees/python-api/generated/triton.testing.perf_report.doctree index 67a179730..1551aa6cd 100644 Binary files a/v1.1.2/.doctrees/python-api/generated/triton.testing.perf_report.doctree and b/v1.1.2/.doctrees/python-api/generated/triton.testing.perf_report.doctree differ diff --git a/v1.1.2/.doctrees/python-api/triton.doctree b/v1.1.2/.doctrees/python-api/triton.doctree index 95fd6312b..3b87c2b20 100644 Binary files a/v1.1.2/.doctrees/python-api/triton.doctree and b/v1.1.2/.doctrees/python-api/triton.doctree differ diff --git a/v1.1.2/.doctrees/python-api/triton.language.doctree b/v1.1.2/.doctrees/python-api/triton.language.doctree index 0507bb4e4..59828cca1 100644 Binary files a/v1.1.2/.doctrees/python-api/triton.language.doctree and b/v1.1.2/.doctrees/python-api/triton.language.doctree differ diff --git a/v1.1.2/.doctrees/python-api/triton.testing.doctree b/v1.1.2/.doctrees/python-api/triton.testing.doctree index a61ba3be2..c4a354e34 100644 Binary files a/v1.1.2/.doctrees/python-api/triton.testing.doctree and b/v1.1.2/.doctrees/python-api/triton.testing.doctree differ diff --git a/v1.1.2/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip b/v1.1.2/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip index a3adb2549..7d16c1dbc 100644 Binary files a/v1.1.2/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip and b/v1.1.2/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip differ diff --git a/v1.1.2/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip b/v1.1.2/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip index a3f7abbf2..dd2d179c9 100644 Binary files a/v1.1.2/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip and b/v1.1.2/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip differ diff --git a/v1.1.2/_images/sphx_glr_01-vector-add_001.png b/v1.1.2/_images/sphx_glr_01-vector-add_001.png index 609d1e8f0..53d3422b2 100644 Binary files a/v1.1.2/_images/sphx_glr_01-vector-add_001.png and b/v1.1.2/_images/sphx_glr_01-vector-add_001.png differ diff --git a/v1.1.2/_images/sphx_glr_01-vector-add_thumb.png b/v1.1.2/_images/sphx_glr_01-vector-add_thumb.png index ace27542e..3662e4bc8 100644 Binary files a/v1.1.2/_images/sphx_glr_01-vector-add_thumb.png and b/v1.1.2/_images/sphx_glr_01-vector-add_thumb.png differ diff --git a/v1.1.2/_images/sphx_glr_02-fused-softmax_001.png b/v1.1.2/_images/sphx_glr_02-fused-softmax_001.png index a51fdbac4..866fe5e25 100644 Binary files a/v1.1.2/_images/sphx_glr_02-fused-softmax_001.png and b/v1.1.2/_images/sphx_glr_02-fused-softmax_001.png differ diff --git a/v1.1.2/_images/sphx_glr_02-fused-softmax_thumb.png b/v1.1.2/_images/sphx_glr_02-fused-softmax_thumb.png index 42cd0e187..51407ff30 100644 Binary files a/v1.1.2/_images/sphx_glr_02-fused-softmax_thumb.png and b/v1.1.2/_images/sphx_glr_02-fused-softmax_thumb.png differ diff --git a/v1.1.2/_images/sphx_glr_03-matrix-multiplication_001.png b/v1.1.2/_images/sphx_glr_03-matrix-multiplication_001.png index 0403f4d14..0ab4fb948 100644 Binary files a/v1.1.2/_images/sphx_glr_03-matrix-multiplication_001.png and b/v1.1.2/_images/sphx_glr_03-matrix-multiplication_001.png differ diff --git a/v1.1.2/_images/sphx_glr_03-matrix-multiplication_thumb.png b/v1.1.2/_images/sphx_glr_03-matrix-multiplication_thumb.png index 1b903baf5..5f60fa5d4 100644 Binary files a/v1.1.2/_images/sphx_glr_03-matrix-multiplication_thumb.png and b/v1.1.2/_images/sphx_glr_03-matrix-multiplication_thumb.png differ diff --git a/v1.1.2/_images/sphx_glr_05-layer-norm_001.png b/v1.1.2/_images/sphx_glr_05-layer-norm_001.png index 842288144..a2fbfb814 100644 Binary files a/v1.1.2/_images/sphx_glr_05-layer-norm_001.png and b/v1.1.2/_images/sphx_glr_05-layer-norm_001.png differ diff --git a/v1.1.2/_images/sphx_glr_05-layer-norm_thumb.png b/v1.1.2/_images/sphx_glr_05-layer-norm_thumb.png index c90e750d6..2348ab874 100644 Binary files a/v1.1.2/_images/sphx_glr_05-layer-norm_thumb.png and b/v1.1.2/_images/sphx_glr_05-layer-norm_thumb.png differ diff --git a/v1.1.2/_sources/getting-started/tutorials/01-vector-add.rst.txt b/v1.1.2/_sources/getting-started/tutorials/01-vector-add.rst.txt index 61b6c4f45..4c274b3db 100644 --- a/v1.1.2/_sources/getting-started/tutorials/01-vector-add.rst.txt +++ b/v1.1.2/_sources/getting-started/tutorials/01-vector-add.rst.txt @@ -232,19 +232,19 @@ We can now run the decorated function above. Pass `print_data=True` to see the p vector-add-performance: size Triton Torch 0 4096.0 9.600000 9.600000 - 1 8192.0 19.200000 19.200000 + 1 8192.0 19.200000 15.999999 2 16384.0 38.400001 38.400001 - 3 32768.0 63.999998 63.999998 + 3 32768.0 76.800002 76.800002 4 65536.0 127.999995 127.999995 5 131072.0 219.428568 219.428568 - 6 262144.0 341.333321 341.333321 + 6 262144.0 341.333321 384.000001 7 524288.0 472.615390 472.615390 8 1048576.0 614.400016 614.400016 9 2097152.0 722.823517 702.171410 10 4194304.0 780.190482 780.190482 11 8388608.0 812.429770 812.429770 12 16777216.0 833.084721 833.084721 - 13 33554432.0 842.004273 842.004273 + 13 33554432.0 842.004273 843.811163 14 67108864.0 847.448255 848.362445 15 134217728.0 849.737435 850.656574 @@ -254,7 +254,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 1 minutes 31.098 seconds) + **Total running time of the script:** ( 1 minutes 28.027 seconds) .. _sphx_glr_download_getting-started_tutorials_01-vector-add.py: diff --git a/v1.1.2/_sources/getting-started/tutorials/02-fused-softmax.rst.txt b/v1.1.2/_sources/getting-started/tutorials/02-fused-softmax.rst.txt index a65fb6102..fb566741a 100644 --- a/v1.1.2/_sources/getting-started/tutorials/02-fused-softmax.rst.txt +++ b/v1.1.2/_sources/getting-started/tutorials/02-fused-softmax.rst.txt @@ -286,17 +286,17 @@ We will then compare its performance against (1) :code:`torch.softmax` and (2) t softmax-performance: N Triton Torch (native) Torch (jit) - 0 256.0 512.000001 546.133347 184.089886 - 1 384.0 585.142862 558.545450 151.703707 + 0 256.0 512.000001 512.000001 192.752942 + 1 384.0 585.142862 585.142862 153.600004 2 512.0 655.360017 585.142849 154.566038 3 640.0 682.666684 640.000002 160.000000 - 4 768.0 702.171410 664.216187 162.754967 + 4 768.0 722.823517 664.216187 162.754967 .. ... ... ... ... - 93 12160.0 812.359066 406.179533 198.834951 - 94 12288.0 814.111783 415.661740 199.096718 - 95 12416.0 812.498981 412.149375 198.655991 - 96 12544.0 812.566838 412.971190 198.913776 - 97 12672.0 812.633240 412.097543 199.069228 + 93 12160.0 814.058574 406.179533 198.429370 + 94 12288.0 814.111783 415.661740 198.694297 + 95 12416.0 812.498981 411.722274 198.259492 + 96 12544.0 812.566838 412.971190 198.618504 + 97 12672.0 812.633240 411.679167 198.679085 [98 rows x 4 columns] @@ -314,7 +314,7 @@ In the above plot, we can see that: .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 3 minutes 16.603 seconds) + **Total running time of the script:** ( 3 minutes 19.106 seconds) .. _sphx_glr_download_getting-started_tutorials_02-fused-softmax.py: diff --git a/v1.1.2/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt b/v1.1.2/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt index 81a2e2603..481dbb055 100644 --- a/v1.1.2/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt +++ b/v1.1.2/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt @@ -464,35 +464,35 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we M cuBLAS ... Triton Triton (+ LeakyReLU) 0 256.0 2.730667 ... 2.978909 2.978909 1 384.0 7.372800 ... 8.507077 8.507077 - 2 512.0 14.563555 ... 16.384000 16.384000 + 2 512.0 14.563555 ... 15.420235 15.420235 3 640.0 22.260869 ... 24.380953 24.380953 4 768.0 32.768000 ... 34.028308 34.028308 - 5 896.0 37.971025 ... 39.025776 39.025776 - 6 1024.0 49.932191 ... 52.428801 52.428801 + 5 896.0 39.025776 ... 40.140799 39.025776 + 6 1024.0 49.932191 ... 53.773130 52.428801 7 1152.0 45.242181 ... 46.656000 46.656000 8 1280.0 51.200001 ... 56.888887 56.109587 - 9 1408.0 64.138541 ... 67.305878 66.485074 + 9 1408.0 64.138541 ... 67.305878 65.684049 10 1536.0 80.430545 ... 79.526831 78.643199 - 11 1664.0 63.372618 ... 62.061463 62.061463 - 12 1792.0 72.983276 ... 72.047592 71.588687 - 13 1920.0 69.120002 ... 70.172588 69.818184 - 14 2048.0 73.584279 ... 76.959706 76.608294 - 15 2176.0 83.155572 ... 85.998493 85.269692 - 16 2304.0 68.251065 ... 76.809875 76.563695 - 17 2432.0 71.125224 ... 83.614477 84.367759 - 18 2560.0 77.833728 ... 80.709358 80.313727 - 19 2688.0 83.737433 ... 89.888756 89.254248 - 20 2816.0 84.035084 ... 83.392363 83.392363 - 21 2944.0 81.431424 ... 83.060049 82.921853 - 22 3072.0 81.943708 ... 88.890270 88.681451 - 23 3200.0 85.106381 ... 92.888243 95.309011 - 24 3328.0 83.371507 ... 84.995628 84.695641 - 25 3456.0 82.519518 ... 91.511426 90.994998 - 26 3584.0 84.825838 ... 93.661869 95.350361 - 27 3712.0 85.163978 ... 88.092894 89.035062 - 28 3840.0 84.356981 ... 91.930177 90.426819 - 29 3968.0 93.648452 ... 84.154440 91.266964 - 30 4096.0 88.504930 ... 86.760004 92.372834 + 11 1664.0 63.372618 ... 62.492442 62.061463 + 12 1792.0 72.983276 ... 72.512412 71.588687 + 13 1920.0 69.120002 ... 70.530615 70.530615 + 14 2048.0 73.908442 ... 77.314362 76.959706 + 15 2176.0 83.500614 ... 86.367588 85.632545 + 16 2304.0 68.251065 ... 77.057651 76.563695 + 17 2432.0 71.125224 ... 85.134737 84.367759 + 18 2560.0 77.833728 ... 81.310171 80.908642 + 19 2688.0 83.552988 ... 90.102270 89.464755 + 20 2816.0 83.392363 ... 83.392363 82.916747 + 21 2944.0 82.102191 ... 82.646820 82.102191 + 22 3072.0 82.301023 ... 88.612060 88.473602 + 23 3200.0 84.880639 ... 95.665176 95.096582 + 24 3328.0 84.101981 ... 84.496824 84.496824 + 25 3456.0 81.890873 ... 88.790274 90.892410 + 26 3584.0 87.211821 ... 90.549237 97.628001 + 27 3712.0 85.748791 ... 93.014284 87.475786 + 28 3840.0 83.591840 ... 92.083268 88.261772 + 29 3968.0 93.648452 ... 90.154371 86.788006 + 30 4096.0 92.627833 ... 87.438257 82.340585 [31 rows x 5 columns] @@ -502,7 +502,7 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 5 minutes 11.940 seconds) + **Total running time of the script:** ( 5 minutes 26.690 seconds) .. _sphx_glr_download_getting-started_tutorials_03-matrix-multiplication.py: diff --git a/v1.1.2/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt b/v1.1.2/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt index 3a385a5bb..488f56f60 100644 --- a/v1.1.2/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt +++ b/v1.1.2/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt @@ -238,7 +238,7 @@ References .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 0 minutes 0.011 seconds) + **Total running time of the script:** ( 0 minutes 0.372 seconds) .. _sphx_glr_download_getting-started_tutorials_04-low-memory-dropout.py: diff --git a/v1.1.2/_sources/getting-started/tutorials/05-layer-norm.rst.txt b/v1.1.2/_sources/getting-started/tutorials/05-layer-norm.rst.txt index e8789c7f7..5786f7831 100644 --- a/v1.1.2/_sources/getting-started/tutorials/05-layer-norm.rst.txt +++ b/v1.1.2/_sources/getting-started/tutorials/05-layer-norm.rst.txt @@ -38,36 +38,36 @@ Layer Normalization layer-norm-backward: N Triton Torch Apex - 0 1024.0 307.200008 98.303995 303.407414 - 1 1536.0 351.085717 133.083026 341.333333 - 2 2048.0 420.102553 161.684218 334.367350 - 3 2560.0 461.954908 182.314537 326.808501 - 4 3072.0 511.999982 191.501303 317.793096 - 5 3584.0 551.384634 207.768111 309.410081 - 6 4096.0 568.231237 219.919464 296.096389 - 7 4608.0 498.162157 232.336141 286.507772 - 8 5120.0 525.128191 242.366855 284.444444 - 9 5632.0 538.517949 243.107920 289.438969 - 10 6144.0 540.131844 248.242431 285.767458 - 11 6656.0 527.207907 256.000009 285.767438 - 12 7168.0 507.469040 261.446807 287.678923 - 13 7680.0 482.513091 261.446804 278.850215 - 14 8192.0 461.521112 267.858310 287.018988 - 15 8704.0 417.791980 267.130429 284.987724 - 16 9216.0 430.319054 272.059034 288.563595 - 17 9728.0 439.683593 280.278512 289.308559 - 18 10240.0 445.217381 286.433562 291.184826 - 19 10752.0 427.940303 246.699797 290.267711 - 20 11264.0 428.424741 245.091565 286.372873 - 21 11776.0 421.826879 249.447482 288.391833 - 22 12288.0 419.504980 254.673582 294.911986 - 23 12800.0 414.016170 253.674644 288.180121 - 24 13312.0 412.242569 252.360194 289.653667 - 25 13824.0 405.594132 257.190689 291.543045 - 26 14336.0 397.761846 254.109315 286.481278 - 27 14848.0 386.498925 257.293872 289.952797 - 28 15360.0 376.932517 257.970599 287.550706 - 29 15872.0 366.982663 262.708969 291.229369 + 0 1024.0 307.200008 99.096776 307.200008 + 1 1536.0 351.085717 133.083026 338.201833 + 2 2048.0 423.724127 161.684218 334.367350 + 3 2560.0 458.507457 182.857144 328.556154 + 4 3072.0 511.999982 191.501303 320.556515 + 5 3584.0 551.384634 208.271186 308.301075 + 6 4096.0 568.231237 220.412561 298.796351 + 7 4608.0 495.928261 231.849059 286.507772 + 8 5120.0 522.893618 242.845844 283.787523 + 9 5632.0 536.380957 243.107920 291.310338 + 10 6144.0 542.117638 248.661056 286.322318 + 11 6656.0 525.473708 256.000009 286.279570 + 12 7168.0 505.976473 261.844750 288.160801 + 13 7680.0 481.253256 260.338991 277.172933 + 14 8192.0 460.440290 268.957600 286.600589 + 15 8704.0 416.958106 267.815384 284.987724 + 16 9216.0 428.651187 272.729961 289.507855 + 17 9728.0 438.857162 280.278512 288.950501 + 18 10240.0 446.836366 286.433562 290.153487 + 19 10752.0 428.651173 246.464170 289.941565 + 20 11264.0 429.104745 245.091565 285.767446 + 21 11776.0 421.826879 249.447482 288.686414 + 22 12288.0 420.102570 254.453844 294.911986 + 23 12800.0 415.135142 253.256381 289.811310 + 24 13312.0 412.242569 252.559690 290.179836 + 25 13824.0 404.604870 257.390218 292.571423 + 26 14336.0 397.761846 254.862216 286.242939 + 27 14848.0 383.999990 257.108233 289.012175 + 28 15360.0 374.253788 257.610071 287.326580 + 29 15872.0 366.982663 262.708969 291.006885 @@ -329,7 +329,7 @@ Layer Normalization .. rst-class:: sphx-glr-timing - **Total running time of the script:** ( 2 minutes 13.206 seconds) + **Total running time of the script:** ( 2 minutes 11.692 seconds) .. _sphx_glr_download_getting-started_tutorials_05-layer-norm.py: diff --git a/v1.1.2/_sources/getting-started/tutorials/sg_execution_times.rst.txt b/v1.1.2/_sources/getting-started/tutorials/sg_execution_times.rst.txt index a9b4edfeb..d2651caed 100644 --- a/v1.1.2/_sources/getting-started/tutorials/sg_execution_times.rst.txt +++ b/v1.1.2/_sources/getting-started/tutorials/sg_execution_times.rst.txt @@ -5,16 +5,16 @@ Computation times ================= -**12:12.858** total execution time for **getting-started_tutorials** files: +**12:25.887** total execution time for **getting-started_tutorials** files: +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 05:11.940 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 05:26.690 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:16.603 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) | 03:19.106 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 02:13.206 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) | 02:11.692 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:31.098 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) | 01:28.027 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) | 00:00.011 | 0.0 MB | +| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) | 00:00.372 | 0.0 MB | +---------------------------------------------------------------------------------------------------------+-----------+--------+ diff --git a/v1.1.2/getting-started/tutorials/01-vector-add.html b/v1.1.2/getting-started/tutorials/01-vector-add.html index 4383d868b..f7ca792d7 100644 --- a/v1.1.2/getting-started/tutorials/01-vector-add.html +++ b/v1.1.2/getting-started/tutorials/01-vector-add.html @@ -322,24 +322,24 @@ for different problem sizes.
vector-add-performance:
size Triton Torch
0 4096.0 9.600000 9.600000
-1 8192.0 19.200000 19.200000
+1 8192.0 19.200000 15.999999
2 16384.0 38.400001 38.400001
-3 32768.0 63.999998 63.999998
+3 32768.0 76.800002 76.800002
4 65536.0 127.999995 127.999995
5 131072.0 219.428568 219.428568
-6 262144.0 341.333321 341.333321
+6 262144.0 341.333321 384.000001
7 524288.0 472.615390 472.615390
8 1048576.0 614.400016 614.400016
9 2097152.0 722.823517 702.171410
10 4194304.0 780.190482 780.190482
11 8388608.0 812.429770 812.429770
12 16777216.0 833.084721 833.084721
-13 33554432.0 842.004273 842.004273
+13 33554432.0 842.004273 843.811163
14 67108864.0 847.448255 848.362445
15 134217728.0 849.737435 850.656574
Total running time of the script: ( 1 minutes 31.098 seconds)
+Total running time of the script: ( 1 minutes 28.027 seconds)