[GH-PAGES] Updated website

2022-08-06 00:49:10 +00:00
parent 844e79e14c
commit 73ee4b1d0d
167 changed files with 288 additions and 288 deletions
--- a/master/.buildinfo
+++ b/master/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: f293410811a23e0582da8ef1fa35bfc6
+config: 4d6b85d7c9a7d93ae015e5e1fbc4850d
 tags: 645f666f9bcd5a90fca523b33c5a78b7
--- a/master/.doctrees/environment.pickle
+++ b/master/.doctrees/environment.pickle
--- a/master/.doctrees/getting-started/installation.doctree
+++ b/master/.doctrees/getting-started/installation.doctree
--- a/master/.doctrees/getting-started/tutorials/01-vector-add.doctree
+++ b/master/.doctrees/getting-started/tutorials/01-vector-add.doctree
--- a/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree
+++ b/master/.doctrees/getting-started/tutorials/02-fused-softmax.doctree
--- a/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree
+++ b/master/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree
--- a/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree
+++ b/master/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree
--- a/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree
+++ b/master/.doctrees/getting-started/tutorials/05-layer-norm.doctree
--- a/master/.doctrees/getting-started/tutorials/06-fused-attention.doctree
+++ b/master/.doctrees/getting-started/tutorials/06-fused-attention.doctree
--- a/master/.doctrees/getting-started/tutorials/07-libdevice-function.doctree
+++ b/master/.doctrees/getting-started/tutorials/07-libdevice-function.doctree
--- a/master/.doctrees/getting-started/tutorials/index.doctree
+++ b/master/.doctrees/getting-started/tutorials/index.doctree
--- a/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree
+++ b/master/.doctrees/getting-started/tutorials/sg_execution_times.doctree
--- a/master/.doctrees/index.doctree
+++ b/master/.doctrees/index.doctree
--- a/master/.doctrees/programming-guide/chapter-1/introduction.doctree
+++ b/master/.doctrees/programming-guide/chapter-1/introduction.doctree
--- a/master/.doctrees/programming-guide/chapter-2/related-work.doctree
+++ b/master/.doctrees/programming-guide/chapter-2/related-work.doctree
--- a/master/.doctrees/python-api/generated/triton.Config.doctree
+++ b/master/.doctrees/python-api/generated/triton.Config.doctree
--- a/master/.doctrees/python-api/generated/triton.autotune.doctree
+++ b/master/.doctrees/python-api/generated/triton.autotune.doctree
--- a/master/.doctrees/python-api/generated/triton.heuristics.doctree
+++ b/master/.doctrees/python-api/generated/triton.heuristics.doctree
--- a/master/.doctrees/python-api/generated/triton.jit.doctree
+++ b/master/.doctrees/python-api/generated/triton.jit.doctree
--- a/master/.doctrees/python-api/generated/triton.language.arange.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.arange.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_add.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_and.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_and.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_cas.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_max.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_min.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_or.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_or.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree
--- a/master/.doctrees/python-api/generated/triton.language.atomic_xor.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.atomic_xor.doctree
--- a/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.broadcast_to.doctree
--- a/master/.doctrees/python-api/generated/triton.language.cos.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.cos.doctree
--- a/master/.doctrees/python-api/generated/triton.language.dot.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.dot.doctree
--- a/master/.doctrees/python-api/generated/triton.language.exp.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.exp.doctree
--- a/master/.doctrees/python-api/generated/triton.language.load.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.load.doctree
--- a/master/.doctrees/python-api/generated/triton.language.log.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.log.doctree
--- a/master/.doctrees/python-api/generated/triton.language.max.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.max.doctree
--- a/master/.doctrees/python-api/generated/triton.language.maximum.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.maximum.doctree
--- a/master/.doctrees/python-api/generated/triton.language.min.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.min.doctree
--- a/master/.doctrees/python-api/generated/triton.language.minimum.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.minimum.doctree
--- a/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.multiple_of.doctree
--- a/master/.doctrees/python-api/generated/triton.language.num_programs.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.num_programs.doctree
--- a/master/.doctrees/python-api/generated/triton.language.program_id.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.program_id.doctree
--- a/master/.doctrees/python-api/generated/triton.language.rand.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.rand.doctree
--- a/master/.doctrees/python-api/generated/triton.language.randint.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.randint.doctree
--- a/master/.doctrees/python-api/generated/triton.language.randint4x.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.randint4x.doctree
--- a/master/.doctrees/python-api/generated/triton.language.randn.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.randn.doctree
--- a/master/.doctrees/python-api/generated/triton.language.ravel.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.ravel.doctree
--- a/master/.doctrees/python-api/generated/triton.language.reshape.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.reshape.doctree
--- a/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.sigmoid.doctree
--- a/master/.doctrees/python-api/generated/triton.language.sin.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.sin.doctree
--- a/master/.doctrees/python-api/generated/triton.language.softmax.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.softmax.doctree
--- a/master/.doctrees/python-api/generated/triton.language.sqrt.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.sqrt.doctree
--- a/master/.doctrees/python-api/generated/triton.language.store.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.store.doctree
--- a/master/.doctrees/python-api/generated/triton.language.sum.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.sum.doctree
--- a/master/.doctrees/python-api/generated/triton.language.where.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.where.doctree
--- a/master/.doctrees/python-api/generated/triton.language.zeros.doctree
+++ b/master/.doctrees/python-api/generated/triton.language.zeros.doctree
--- a/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree
+++ b/master/.doctrees/python-api/generated/triton.testing.Benchmark.doctree
--- a/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree
+++ b/master/.doctrees/python-api/generated/triton.testing.do_bench.doctree
--- a/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree
+++ b/master/.doctrees/python-api/generated/triton.testing.perf_report.doctree
--- a/master/.doctrees/python-api/triton.doctree
+++ b/master/.doctrees/python-api/triton.doctree
--- a/master/.doctrees/python-api/triton.language.doctree
+++ b/master/.doctrees/python-api/triton.language.doctree
--- a/master/.doctrees/python-api/triton.testing.doctree
+++ b/master/.doctrees/python-api/triton.testing.doctree
--- a/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip
+++ b/master/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip
--- a/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip
+++ b/master/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip
--- a/master/_images/sphx_glr_01-vector-add_001.png
+++ b/master/_images/sphx_glr_01-vector-add_001.png
--- a/master/_images/sphx_glr_01-vector-add_thumb.png
+++ b/master/_images/sphx_glr_01-vector-add_thumb.png
--- a/master/_images/sphx_glr_02-fused-softmax_001.png
+++ b/master/_images/sphx_glr_02-fused-softmax_001.png
--- a/master/_images/sphx_glr_02-fused-softmax_thumb.png
+++ b/master/_images/sphx_glr_02-fused-softmax_thumb.png
--- a/master/_images/sphx_glr_03-matrix-multiplication_001.png
+++ b/master/_images/sphx_glr_03-matrix-multiplication_001.png
--- a/master/_images/sphx_glr_03-matrix-multiplication_thumb.png
+++ b/master/_images/sphx_glr_03-matrix-multiplication_thumb.png
--- a/master/_images/sphx_glr_05-layer-norm_001.png
+++ b/master/_images/sphx_glr_05-layer-norm_001.png
--- a/master/_images/sphx_glr_05-layer-norm_thumb.png
+++ b/master/_images/sphx_glr_05-layer-norm_thumb.png
--- a/master/_sources/getting-started/tutorials/01-vector-add.rst.txt
+++ b/master/_sources/getting-started/tutorials/01-vector-add.rst.txt
@@ -234,8 +234,8 @@ We can now run the decorated function above. Pass `print_data=True` to see the p
               size      Triton       Torch
    0        4096.0    9.600000    9.600000
    1        8192.0   19.200000   19.200000
-    2       16384.0   38.400001   38.400001
-    3       32768.0   63.999998   63.999998
+    2       16384.0   31.999999   38.400001
+    3       32768.0   76.800002   63.999998
    4       65536.0  127.999995  127.999995
    5      131072.0  219.428568  219.428568
    6      262144.0  384.000001  384.000001
@@ -245,7 +245,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p
    10    4194304.0  780.190482  780.190482
    11    8388608.0  812.429770  812.429770
    12   16777216.0  833.084721  833.084721
-    13   33554432.0  842.004273  843.811163
+    13   33554432.0  842.004273  842.906750
    14   67108864.0  847.448255  848.362445
    15  134217728.0  849.737435  850.656574

@@ -255,7 +255,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p

 .. rst-class:: sphx-glr-timing

-   **Total running time of the script:** ( 1 minutes  50.498 seconds)
+   **Total running time of the script:** ( 1 minutes  50.715 seconds)


 .. _sphx_glr_download_getting-started_tutorials_01-vector-add.py:
--- a/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt
+++ b/master/_sources/getting-started/tutorials/02-fused-softmax.rst.txt
@@ -278,17 +278,17 @@ We will then compare its performance against (1) :code:`torch.softmax` and (2) t

    softmax-performance:
              N      Triton  Torch (native)  Torch (jit)
-    0     256.0  546.133347      546.133347   190.511628
-    1     384.0  614.400016      585.142862   153.600004
-    2     512.0  655.360017      606.814814   154.566038
+    0     256.0  546.133347      546.133347   188.321838
+    1     384.0  614.400016      585.142862   151.703707
+    2     512.0  655.360017      606.814814   156.038096
    3     640.0  706.206879      640.000002   160.000000
    4     768.0  722.823517      664.216187   162.754967
    ..      ...         ...             ...          ...
-    93  12160.0  812.359066      406.179533   198.530610
-    94  12288.0  812.429770      415.222812   198.794749
-    95  12416.0  812.498981      412.149375   198.556711
-    96  12544.0  810.925276      412.546756   198.716830
-    97  12672.0  811.007961      412.097543   198.776477
+    93  12160.0  812.359066      405.755985   198.834951
+    94  12288.0  812.429770      415.222812   199.096718
+    95  12416.0  812.498981      411.722274   198.655991
+    96  12544.0  810.925276      412.971190   198.913776
+    97  12672.0  811.007961      412.516771   199.069228

    [98 rows x 4 columns]

@@ -306,7 +306,7 @@ In the above plot, we can see that:

 .. rst-class:: sphx-glr-timing

-   **Total running time of the script:** ( 3 minutes  32.107 seconds)
+   **Total running time of the script:** ( 3 minutes  30.914 seconds)


 .. _sphx_glr_download_getting-started_tutorials_02-fused-softmax.py:
--- a/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt
+++ b/master/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt
@@ -459,37 +459,37 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we

    matmul-performance:
             M     cuBLAS  ...     Triton  Triton (+ LeakyReLU)
-    0    256.0   2.978909  ...   2.978909              3.276800
+    0    256.0   2.730667  ...   2.978909              2.978909
    1    384.0   7.372800  ...   8.507077              8.507077
    2    512.0  14.563555  ...  16.384000             16.384000
    3    640.0  22.260869  ...  24.380953             24.380953
    4    768.0  32.768000  ...  35.389441             34.028308
    5    896.0  39.025776  ...  40.140799             39.025776
    6   1024.0  49.932191  ...  53.773130             52.428801
-    7   1152.0  44.566925  ...  47.396572             47.396572
+    7   1152.0  45.242181  ...  48.161033             47.396572
    8   1280.0  51.200001  ...  57.690139             57.690139
-    9   1408.0  64.138541  ...  68.147202             67.305878
-    10  1536.0  80.430545  ...  81.355034             79.526831
+    9   1408.0  64.138541  ...  69.009825             67.305878
+    10  1536.0  80.430545  ...  80.430545             79.526831
    11  1664.0  62.929456  ...  63.372618             62.492442
    12  1792.0  72.512412  ...  73.460287             59.467852
    13  1920.0  69.120002  ...  71.626943             71.257735
    14  2048.0  73.908442  ...  78.398206             77.314362
    15  2176.0  83.500614  ...  87.876193             86.367588
-    16  2304.0  68.446623  ...  78.064941             77.307030
-    17  2432.0  71.305746  ...  86.711310             85.653855
-    18  2560.0  78.019048  ...  82.956960             81.715711
-    19  2688.0  83.922689  ...  90.316801             90.102270
-    20  2816.0  79.587973  ...  83.873477             83.873477
-    21  2944.0  81.832567  ...  83.617504             82.921853
-    22  3072.0  81.707223  ...  90.164177             88.197981
-    23  3200.0  82.262212  ...  97.190583             95.238096
-    24  3328.0  82.843841  ...  85.196803             84.200347
-    25  3456.0  79.430113  ...  85.133652             82.604067
-    26  3584.0  87.381330  ...  99.244365             91.563533
-    27  3712.0  84.874549  ...  82.902362             86.044224
-    28  3840.0  84.550462  ...  93.405401             84.809814
-    29  3968.0  92.512459  ...  84.975809             90.859224
-    30  4096.0  86.313653  ...  93.792965             90.321484
+    16  2304.0  68.251065  ...  78.064941             77.307030
+    17  2432.0  71.305746  ...  86.179335             85.393507
+    18  2560.0  77.833728  ...  82.539044             81.512437
+    19  2688.0  83.737433  ...  91.185232             89.254248
+    20  2816.0  79.879498  ...  82.602666             83.392363
+    21  2944.0  82.102191  ...  82.990890             83.337844
+    22  3072.0  80.202695  ...  89.170242             87.381335
+    23  3200.0  82.474230  ...  96.676741             95.238096
+    24  3328.0  82.843841  ...  86.062515             84.795401
+    25  3456.0  81.026701  ...  91.200871             87.347312
+    26  3584.0  87.381330  ...  95.350361             98.268190
+    27  3712.0  85.970176  ...  89.353616             87.552452
+    28  3840.0  79.192264  ...  91.853823             85.796739
+    29  3968.0  87.850207  ...  86.449828             89.988156
+    30  4096.0  86.509232  ...  92.948562             87.352901

    [31 rows x 5 columns]

@@ -499,7 +499,7 @@ We can now compare the performance of our kernel against that of cuBLAS. Here we

 .. rst-class:: sphx-glr-timing

-   **Total running time of the script:** ( 6 minutes  38.502 seconds)
+   **Total running time of the script:** ( 7 minutes  14.457 seconds)


 .. _sphx_glr_download_getting-started_tutorials_03-matrix-multiplication.py:
--- a/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt
+++ b/master/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt
@@ -240,7 +240,7 @@ References

 .. rst-class:: sphx-glr-timing

-   **Total running time of the script:** ( 0 minutes  0.012 seconds)
+   **Total running time of the script:** ( 0 minutes  0.282 seconds)


 .. _sphx_glr_download_getting-started_tutorials_04-low-memory-dropout.py:
--- a/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt
+++ b/master/_sources/getting-started/tutorials/05-layer-norm.rst.txt
@@ -38,35 +38,35 @@ Layer Normalization

    layer-norm:
              N      Triton       Torch        Apex
-    0    1024.0  606.814814  277.694907  468.114273
+    0    1024.0  585.142849  277.694907  468.114273
    1    1536.0  630.153868  323.368435  511.999982
-    2    2048.0  682.666643  334.367358  520.126988
-    3    2560.0  694.237267  365.714281  512.000013
-    4    3072.0  712.347810  378.092307  496.484863
-    5    3584.0  725.873439  384.859062  451.527536
-    6    4096.0  728.177767  381.023256  455.111095
-    7    4608.0  670.254540  394.267384  426.173427
-    8    5120.0  688.403381  397.669909  422.268057
-    9    5632.0  704.000002  395.228063  415.262685
+    2    2048.0  682.666643  337.814445  520.126988
+    3    2560.0  694.237267  365.714281  518.481028
+    4    3072.0  712.347810  375.206126  496.484863
+    5    3584.0  725.873439  384.859062  455.111115
+    6    4096.0  728.177767  381.023256  442.810792
+    7    4608.0  670.254540  396.387087  426.173427
+    8    5120.0  688.403381  397.669909  426.666652
+    9    5632.0  698.542675  398.725657  411.470331
    10   6144.0  697.191505  402.885254  409.600010
-    11   6656.0  700.631610  400.360920  400.360920
-    12   7168.0  690.891575  396.844306  387.459443
-    13   7680.0  678.895043  393.846167  386.415087
-    14   8192.0  636.271854  393.609605  371.308771
-    15   8704.0  627.315309  389.005597  381.545190
-    16   9216.0  606.814809  407.337026  383.999986
-    17   9728.0  587.350922  409.599987  383.369452
-    18  10240.0  564.965524  408.578556  382.803739
-    19  10752.0  547.872604  411.559798  381.445676
-    20  11264.0  533.207081  406.826188  373.134567
-    21  11776.0  520.486200  409.599991  377.587162
-    22  12288.0  514.680630  413.911572  383.251457
-    23  12800.0  504.433489  410.420828  376.470582
-    24  13312.0  494.180982  405.699062  376.976995
-    25  13824.0  482.934503  411.888257  379.389355
-    26  14336.0  471.967074  406.695045  374.185964
-    27  14848.0  461.297068  408.192434  375.304904
-    28  15360.0  454.269882  406.214870  378.092307
+    11   6656.0  700.631610  400.360920  398.861429
+    12   7168.0  690.891575  382.293315  382.293315
+    13   7680.0  678.895043  392.587863  386.415087
+    14   8192.0  636.271854  392.431125  374.491442
+    15   8704.0  624.502255  392.292962  380.502740
+    16   9216.0  606.814809  403.989025  383.002605
+    17   9728.0  587.350922  407.455499  382.427505
+    18  10240.0  566.920437  407.562184  381.911416
+    19  10752.0  547.872604  410.577576  380.601764
+    20  11264.0  533.207081  396.096702  369.311483
+    21  11776.0  521.927959  407.826843  377.587162
+    22  12288.0  516.031509  413.042029  382.505826
+    23  12800.0  504.433489  408.782457  376.470582
+    24  13312.0  494.180982  401.871683  375.647260
+    25  13824.0  482.934503  409.600016  378.092325
+    26  14336.0  471.967074  398.914774  372.969090
+    27  14848.0  461.297068  403.341254  374.712936
+    28  15360.0  454.269882  406.887417  378.092307
    29  15872.0  447.887117  406.974373  376.225175


@@ -393,7 +393,7 @@ Layer Normalization

 .. rst-class:: sphx-glr-timing

-   **Total running time of the script:** ( 5 minutes  38.270 seconds)
+   **Total running time of the script:** ( 5 minutes  38.714 seconds)


 .. _sphx_glr_download_getting-started_tutorials_05-layer-norm.py:
--- a/master/_sources/getting-started/tutorials/06-fused-attention.rst.txt
+++ b/master/_sources/getting-started/tutorials/06-fused-attention.rst.txt
@@ -385,7 +385,7 @@ This is a Triton implementation of the Flash Attention algorithm

 .. rst-class:: sphx-glr-timing

-   **Total running time of the script:** ( 0 minutes  0.073 seconds)
+   **Total running time of the script:** ( 0 minutes  0.074 seconds)


 .. _sphx_glr_download_getting-started_tutorials_06-fused-attention.py:
--- a/master/_sources/getting-started/tutorials/07-libdevice-function.rst.txt
+++ b/master/_sources/getting-started/tutorials/07-libdevice-function.rst.txt
@@ -152,7 +152,7 @@ We can also customize the libdevice library path by passing the path to the `lib

 .. rst-class:: sphx-glr-timing

-   **Total running time of the script:** ( 0 minutes  0.010 seconds)
+   **Total running time of the script:** ( 0 minutes  0.250 seconds)


 .. _sphx_glr_download_getting-started_tutorials_07-libdevice-function.py:
--- a/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt
+++ b/master/_sources/getting-started/tutorials/sg_execution_times.rst.txt
@@ -5,20 +5,20 @@

 Computation times
 =================
-**17:39.472** total execution time for **getting-started_tutorials** files:
+**18:15.408** total execution time for **getting-started_tutorials** files:

 +---------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 06:38.502 | 0.0 MB |
+| :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) | 07:14.457 | 0.0 MB |
 +---------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``)                       | 05:38.270 | 0.0 MB |
+| :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``)                       | 05:38.714 | 0.0 MB |
 +---------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``)                 | 03:32.107 | 0.0 MB |
+| :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``)                 | 03:30.914 | 0.0 MB |
 +---------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``)                       | 01:50.498 | 0.0 MB |
+| :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``)                       | 01:50.715 | 0.0 MB |
 +---------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_getting-started_tutorials_06-fused-attention.py` (``06-fused-attention.py``)             | 00:00.073 | 0.0 MB |
+| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``)       | 00:00.282 | 0.0 MB |
 +---------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``)       | 00:00.012 | 0.0 MB |
+| :ref:`sphx_glr_getting-started_tutorials_07-libdevice-function.py` (``07-libdevice-function.py``)       | 00:00.250 | 0.0 MB |
 +---------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_getting-started_tutorials_07-libdevice-function.py` (``07-libdevice-function.py``)       | 00:00.010 | 0.0 MB |
+| :ref:`sphx_glr_getting-started_tutorials_06-fused-attention.py` (``06-fused-attention.py``)             | 00:00.074 | 0.0 MB |
 +---------------------------------------------------------------------------------------------------------+-----------+--------+
--- a/master/getting-started/tutorials/01-vector-add.html
+++ b/master/getting-started/tutorials/01-vector-add.html
@@ -326,8 +326,8 @@ for different problem sizes.</p>
           size      Triton       Torch
 0        4096.0    9.600000    9.600000
 1        8192.0   19.200000   19.200000
-2       16384.0   38.400001   38.400001
-3       32768.0   63.999998   63.999998
+2       16384.0   31.999999   38.400001
+3       32768.0   76.800002   63.999998
 4       65536.0  127.999995  127.999995
 5      131072.0  219.428568  219.428568
 6      262144.0  384.000001  384.000001
@@ -337,12 +337,12 @@ for different problem sizes.</p>
 10    4194304.0  780.190482  780.190482
 11    8388608.0  812.429770  812.429770
 12   16777216.0  833.084721  833.084721
-13   33554432.0  842.004273  843.811163
+13   33554432.0  842.004273  842.906750
 14   67108864.0  847.448255  848.362445
 15  134217728.0  849.737435  850.656574
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  50.498 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  50.715 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-01-vector-add-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/62d97d49a32414049819dd8bb8378080/01-vector-add.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">01-vector-add.py</span></code></a></p>
--- a/master/getting-started/tutorials/02-fused-softmax.html
+++ b/master/getting-started/tutorials/02-fused-softmax.html
@@ -371,17 +371,17 @@ We will then compare its performance against (1) <code class="code docutils lite
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>softmax-performance:
          N      Triton  Torch (native)  Torch (jit)
-0     256.0  546.133347      546.133347   190.511628
-1     384.0  614.400016      585.142862   153.600004
-2     512.0  655.360017      606.814814   154.566038
+0     256.0  546.133347      546.133347   188.321838
+1     384.0  614.400016      585.142862   151.703707
+2     512.0  655.360017      606.814814   156.038096
 3     640.0  706.206879      640.000002   160.000000
 4     768.0  722.823517      664.216187   162.754967
 ..      ...         ...             ...          ...
-93  12160.0  812.359066      406.179533   198.530610
-94  12288.0  812.429770      415.222812   198.794749
-95  12416.0  812.498981      412.149375   198.556711
-96  12544.0  810.925276      412.546756   198.716830
-97  12672.0  811.007961      412.097543   198.776477
+93  12160.0  812.359066      405.755985   198.834951
+94  12288.0  812.429770      415.222812   199.096718
+95  12416.0  812.498981      411.722274   198.655991
+96  12544.0  810.925276      412.971190   198.913776
+97  12672.0  811.007961      412.516771   199.069228

 [98 rows x 4 columns]
 </pre></div>
@@ -394,7 +394,7 @@ We will then compare its performance against (1) <code class="code docutils lite
 Note however that the PyTorch <cite>softmax</cite> operation is more general and will works on tensors of any shape.</p></li>
 </ul>
 </div></blockquote>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  32.107 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  30.914 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-02-fused-softmax-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/d91442ac2982c4e0cc3ab0f43534afbc/02-fused-softmax.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">02-fused-softmax.py</span></code></a></p>
--- a/master/getting-started/tutorials/03-matrix-multiplication.html
+++ b/master/getting-started/tutorials/03-matrix-multiplication.html
@@ -567,42 +567,42 @@ torch_output=tensor([[  1.1045, -36.9688,  31.4688,  ..., -11.3906,  24.4531, -3
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>matmul-performance:
         M     cuBLAS  ...     Triton  Triton (+ LeakyReLU)
-0    256.0   2.978909  ...   2.978909              3.276800
+0    256.0   2.730667  ...   2.978909              2.978909
 1    384.0   7.372800  ...   8.507077              8.507077
 2    512.0  14.563555  ...  16.384000             16.384000
 3    640.0  22.260869  ...  24.380953             24.380953
 4    768.0  32.768000  ...  35.389441             34.028308
 5    896.0  39.025776  ...  40.140799             39.025776
 6   1024.0  49.932191  ...  53.773130             52.428801
-7   1152.0  44.566925  ...  47.396572             47.396572
+7   1152.0  45.242181  ...  48.161033             47.396572
 8   1280.0  51.200001  ...  57.690139             57.690139
-9   1408.0  64.138541  ...  68.147202             67.305878
-10  1536.0  80.430545  ...  81.355034             79.526831
+9   1408.0  64.138541  ...  69.009825             67.305878
+10  1536.0  80.430545  ...  80.430545             79.526831
 11  1664.0  62.929456  ...  63.372618             62.492442
 12  1792.0  72.512412  ...  73.460287             59.467852
 13  1920.0  69.120002  ...  71.626943             71.257735
 14  2048.0  73.908442  ...  78.398206             77.314362
 15  2176.0  83.500614  ...  87.876193             86.367588
-16  2304.0  68.446623  ...  78.064941             77.307030
-17  2432.0  71.305746  ...  86.711310             85.653855
-18  2560.0  78.019048  ...  82.956960             81.715711
-19  2688.0  83.922689  ...  90.316801             90.102270
-20  2816.0  79.587973  ...  83.873477             83.873477
-21  2944.0  81.832567  ...  83.617504             82.921853
-22  3072.0  81.707223  ...  90.164177             88.197981
-23  3200.0  82.262212  ...  97.190583             95.238096
-24  3328.0  82.843841  ...  85.196803             84.200347
-25  3456.0  79.430113  ...  85.133652             82.604067
-26  3584.0  87.381330  ...  99.244365             91.563533
-27  3712.0  84.874549  ...  82.902362             86.044224
-28  3840.0  84.550462  ...  93.405401             84.809814
-29  3968.0  92.512459  ...  84.975809             90.859224
-30  4096.0  86.313653  ...  93.792965             90.321484
+16  2304.0  68.251065  ...  78.064941             77.307030
+17  2432.0  71.305746  ...  86.179335             85.393507
+18  2560.0  77.833728  ...  82.539044             81.512437
+19  2688.0  83.737433  ...  91.185232             89.254248
+20  2816.0  79.879498  ...  82.602666             83.392363
+21  2944.0  82.102191  ...  82.990890             83.337844
+22  3072.0  80.202695  ...  89.170242             87.381335
+23  3200.0  82.474230  ...  96.676741             95.238096
+24  3328.0  82.843841  ...  86.062515             84.795401
+25  3456.0  81.026701  ...  91.200871             87.347312
+26  3584.0  87.381330  ...  95.350361             98.268190
+27  3712.0  85.970176  ...  89.353616             87.552452
+28  3840.0  79.192264  ...  91.853823             85.796739
+29  3968.0  87.850207  ...  86.449828             89.988156
+30  4096.0  86.509232  ...  92.948562             87.352901

 [31 rows x 5 columns]
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 6 minutes  38.502 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 7 minutes  14.457 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-03-matrix-multiplication-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/d5fee5b55a64e47f1b5724ec39adf171/03-matrix-multiplication.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">03-matrix-multiplication.py</span></code></a></p>
--- a/master/getting-started/tutorials/04-low-memory-dropout.html
+++ b/master/getting-started/tutorials/04-low-memory-dropout.html
@@ -374,7 +374,7 @@ to explore the <cite>triton/language/random</cite> folder!</p>
 <dd><p>Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov, “Dropout: A Simple Way to Prevent Neural Networks from Overfitting”, JMLR 2014</p>
 </dd>
 </dl>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.012 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.282 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-04-low-memory-dropout-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/c9aed78977a4c05741d675a38dde3d7d/04-low-memory-dropout.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">04-low-memory-dropout.py</span></code></a></p>
--- a/master/getting-started/tutorials/05-layer-norm.html
+++ b/master/getting-started/tutorials/05-layer-norm.html
@@ -196,35 +196,35 @@ to download the full example code</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>layer-norm:
          N      Triton       Torch        Apex
-0    1024.0  606.814814  277.694907  468.114273
+0    1024.0  585.142849  277.694907  468.114273
 1    1536.0  630.153868  323.368435  511.999982
-2    2048.0  682.666643  334.367358  520.126988
-3    2560.0  694.237267  365.714281  512.000013
-4    3072.0  712.347810  378.092307  496.484863
-5    3584.0  725.873439  384.859062  451.527536
-6    4096.0  728.177767  381.023256  455.111095
-7    4608.0  670.254540  394.267384  426.173427
-8    5120.0  688.403381  397.669909  422.268057
-9    5632.0  704.000002  395.228063  415.262685
+2    2048.0  682.666643  337.814445  520.126988
+3    2560.0  694.237267  365.714281  518.481028
+4    3072.0  712.347810  375.206126  496.484863
+5    3584.0  725.873439  384.859062  455.111115
+6    4096.0  728.177767  381.023256  442.810792
+7    4608.0  670.254540  396.387087  426.173427
+8    5120.0  688.403381  397.669909  426.666652
+9    5632.0  698.542675  398.725657  411.470331
 10   6144.0  697.191505  402.885254  409.600010
-11   6656.0  700.631610  400.360920  400.360920
-12   7168.0  690.891575  396.844306  387.459443
-13   7680.0  678.895043  393.846167  386.415087
-14   8192.0  636.271854  393.609605  371.308771
-15   8704.0  627.315309  389.005597  381.545190
-16   9216.0  606.814809  407.337026  383.999986
-17   9728.0  587.350922  409.599987  383.369452
-18  10240.0  564.965524  408.578556  382.803739
-19  10752.0  547.872604  411.559798  381.445676
-20  11264.0  533.207081  406.826188  373.134567
-21  11776.0  520.486200  409.599991  377.587162
-22  12288.0  514.680630  413.911572  383.251457
-23  12800.0  504.433489  410.420828  376.470582
-24  13312.0  494.180982  405.699062  376.976995
-25  13824.0  482.934503  411.888257  379.389355
-26  14336.0  471.967074  406.695045  374.185964
-27  14848.0  461.297068  408.192434  375.304904
-28  15360.0  454.269882  406.214870  378.092307
+11   6656.0  700.631610  400.360920  398.861429
+12   7168.0  690.891575  382.293315  382.293315
+13   7680.0  678.895043  392.587863  386.415087
+14   8192.0  636.271854  392.431125  374.491442
+15   8704.0  624.502255  392.292962  380.502740
+16   9216.0  606.814809  403.989025  383.002605
+17   9728.0  587.350922  407.455499  382.427505
+18  10240.0  566.920437  407.562184  381.911416
+19  10752.0  547.872604  410.577576  380.601764
+20  11264.0  533.207081  396.096702  369.311483
+21  11776.0  521.927959  407.826843  377.587162
+22  12288.0  516.031509  413.042029  382.505826
+23  12800.0  504.433489  408.782457  376.470582
+24  13312.0  494.180982  401.871683  375.647260
+25  13824.0  482.934503  409.600016  378.092325
+26  14336.0  471.967074  398.914774  372.969090
+27  14848.0  461.297068  403.341254  374.712936
+28  15360.0  454.269882  406.887417  378.092307
 29  15872.0  447.887117  406.974373  376.225175
 </pre></div>
 </div>
@@ -543,7 +543,7 @@ to download the full example code</p>
 <span class="n">bench_layer_norm</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">save_path</span><span class="o">=</span><span class="s1">&#39;.&#39;</span><span class="p">,</span> <span class="n">print_data</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  38.270 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  38.714 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-05-layer-norm-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/935c0dd0fbeb4b2e69588471cbb2d4b2/05-layer-norm.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">05-layer-norm.py</span></code></a></p>
--- a/master/getting-started/tutorials/06-fused-attention.html
+++ b/master/getting-started/tutorials/06-fused-attention.html
@@ -543,7 +543,7 @@ to download the full example code</p>
 <span class="c1"># bench_flash_attention.run(save_path=&#39;.&#39;, print_data=True)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.073 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.074 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-06-fused-attention-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/54a35f6ec55f9746935b9566fb6bb1df/06-fused-attention.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">06-fused-attention.py</span></code></a></p>
--- a/master/getting-started/tutorials/07-libdevice-function.html
+++ b/master/getting-started/tutorials/07-libdevice-function.html
@@ -276,7 +276,7 @@ tensor([0.4105, 0.5430, 0.0249,  ..., 0.0424, 0.5351, 0.8149], device=&#39;cuda:
 The maximum difference between torch and triton is 2.384185791015625e-07
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.010 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.250 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-07-libdevice-function-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/3ff29f967ace7985da24aab10352fc76/07-libdevice-function.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">07-libdevice-function.py</span></code></a></p>
--- a/master/getting-started/tutorials/sg_execution_times.html
+++ b/master/getting-started/tutorials/sg_execution_times.html
@@ -174,7 +174,7 @@
            
  <div class="section" id="computation-times">
 <span id="sphx-glr-getting-started-tutorials-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>17:39.472</strong> total execution time for <strong>getting-started_tutorials</strong> files:</p>
+<p><strong>18:15.408</strong> total execution time for <strong>getting-started_tutorials</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -183,31 +183,31 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="03-matrix-multiplication.html#sphx-glr-getting-started-tutorials-03-matrix-multiplication-py"><span class="std std-ref">Matrix Multiplication</span></a> (<code class="docutils literal notranslate"><span class="pre">03-matrix-multiplication.py</span></code>)</p></td>
-<td><p>06:38.502</p></td>
+<td><p>07:14.457</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="05-layer-norm.html#sphx-glr-getting-started-tutorials-05-layer-norm-py"><span class="std std-ref">Layer Normalization</span></a> (<code class="docutils literal notranslate"><span class="pre">05-layer-norm.py</span></code>)</p></td>
-<td><p>05:38.270</p></td>
+<td><p>05:38.714</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="02-fused-softmax.html#sphx-glr-getting-started-tutorials-02-fused-softmax-py"><span class="std std-ref">Fused Softmax</span></a> (<code class="docutils literal notranslate"><span class="pre">02-fused-softmax.py</span></code>)</p></td>
-<td><p>03:32.107</p></td>
+<td><p>03:30.914</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="01-vector-add.html#sphx-glr-getting-started-tutorials-01-vector-add-py"><span class="std std-ref">Vector Addition</span></a> (<code class="docutils literal notranslate"><span class="pre">01-vector-add.py</span></code>)</p></td>
-<td><p>01:50.498</p></td>
+<td><p>01:50.715</p></td>
+<td><p>0.0 MB</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="04-low-memory-dropout.html#sphx-glr-getting-started-tutorials-04-low-memory-dropout-py"><span class="std std-ref">Low-Memory Dropout</span></a> (<code class="docutils literal notranslate"><span class="pre">04-low-memory-dropout.py</span></code>)</p></td>
+<td><p>00:00.282</p></td>
+<td><p>0.0 MB</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="07-libdevice-function.html#sphx-glr-getting-started-tutorials-07-libdevice-function-py"><span class="std std-ref">Libdevice function</span></a> (<code class="docutils literal notranslate"><span class="pre">07-libdevice-function.py</span></code>)</p></td>
+<td><p>00:00.250</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="06-fused-attention.html#sphx-glr-getting-started-tutorials-06-fused-attention-py"><span class="std std-ref">Fused Attention</span></a> (<code class="docutils literal notranslate"><span class="pre">06-fused-attention.py</span></code>)</p></td>
-<td><p>00:00.073</p></td>
-<td><p>0.0 MB</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="04-low-memory-dropout.html#sphx-glr-getting-started-tutorials-04-low-memory-dropout-py"><span class="std std-ref">Low-Memory Dropout</span></a> (<code class="docutils literal notranslate"><span class="pre">04-low-memory-dropout.py</span></code>)</p></td>
-<td><p>00:00.012</p></td>
-<td><p>0.0 MB</p></td>
-</tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="07-libdevice-function.html#sphx-glr-getting-started-tutorials-07-libdevice-function-py"><span class="std std-ref">Libdevice function</span></a> (<code class="docutils literal notranslate"><span class="pre">07-libdevice-function.py</span></code>)</p></td>
-<td><p>00:00.010</p></td>
+<td><p>00:00.074</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
--- a/master/searchindex.js
+++ b/master/searchindex.js
--- a/v1.1.2/.buildinfo
+++ b/v1.1.2/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 67b632ed73e130ff238d935883579ed2
+config: b6a148ad6f1fecfeeccdf8b46659d01a
 tags: 645f666f9bcd5a90fca523b33c5a78b7
--- a/v1.1.2/.doctrees/environment.pickle
+++ b/v1.1.2/.doctrees/environment.pickle
--- a/v1.1.2/.doctrees/getting-started/installation.doctree
+++ b/v1.1.2/.doctrees/getting-started/installation.doctree
--- a/v1.1.2/.doctrees/getting-started/tutorials/01-vector-add.doctree
+++ b/v1.1.2/.doctrees/getting-started/tutorials/01-vector-add.doctree
--- a/v1.1.2/.doctrees/getting-started/tutorials/02-fused-softmax.doctree
+++ b/v1.1.2/.doctrees/getting-started/tutorials/02-fused-softmax.doctree
--- a/v1.1.2/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree
+++ b/v1.1.2/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree
--- a/v1.1.2/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree
+++ b/v1.1.2/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree
--- a/v1.1.2/.doctrees/getting-started/tutorials/05-layer-norm.doctree
+++ b/v1.1.2/.doctrees/getting-started/tutorials/05-layer-norm.doctree
--- a/v1.1.2/.doctrees/getting-started/tutorials/index.doctree
+++ b/v1.1.2/.doctrees/getting-started/tutorials/index.doctree
--- a/v1.1.2/.doctrees/getting-started/tutorials/sg_execution_times.doctree
+++ b/v1.1.2/.doctrees/getting-started/tutorials/sg_execution_times.doctree
--- a/v1.1.2/.doctrees/index.doctree
+++ b/v1.1.2/.doctrees/index.doctree
--- a/v1.1.2/.doctrees/programming-guide/chapter-1/introduction.doctree
+++ b/v1.1.2/.doctrees/programming-guide/chapter-1/introduction.doctree
--- a/Show More
+++ b/Show More