[GH-PAGES] Updated website

2022-04-12 00:41:56 +00:00
parent 56933acae6
commit d19191fa7e
156 changed files with 276 additions and 276 deletions
--- a/v1.1.2/getting-started/tutorials/01-vector-add.html
+++ b/v1.1.2/getting-started/tutorials/01-vector-add.html
@@ -324,7 +324,7 @@ for different problem sizes.</p>
 0        4096.0    9.600000    9.600000
 1        8192.0   19.200000   19.200000
 2       16384.0   38.400001   38.400001
-3       32768.0   63.999998   76.800002
+3       32768.0   76.800002   76.800002
 4       65536.0  127.999995  127.999995
 5      131072.0  219.428568  219.428568
 6      262144.0  341.333321  341.333321
@@ -339,7 +339,7 @@ for different problem sizes.</p>
 15  134217728.0  849.737435  850.656574
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  46.221 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  43.041 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-01-vector-add-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/62d97d49a32414049819dd8bb8378080/01-vector-add.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">01-vector-add.py</span></code></a></p>
--- a/v1.1.2/getting-started/tutorials/02-fused-softmax.html
+++ b/v1.1.2/getting-started/tutorials/02-fused-softmax.html
@@ -374,17 +374,17 @@ We will then compare its performance against (1) <code class="code docutils lite
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>softmax-performance:
          N      Triton  Torch (native)  Torch (jit)
-0     256.0  512.000001      546.133347   190.511628
+0     256.0  512.000001      546.133347   188.321838
 1     384.0  585.142862      585.142862   151.703707
 2     512.0  655.360017      585.142849   156.038096
 3     640.0  682.666684      640.000002   158.759699
-4     768.0  722.823517      646.736871   162.754967
+4     768.0  722.823517      646.736871   163.839992
 ..      ...         ...             ...          ...
 93  12160.0  812.359066      405.755985   198.530610
-94  12288.0  814.111783      415.661740   198.794749
+94  12288.0  814.111783      415.222812   198.895304
 95  12416.0  812.498981      412.149375   198.457532
-96  12544.0  812.566838      412.971190   198.618504
-97  12672.0  812.633240      412.097543   198.776477
+96  12544.0  812.566838      412.971190   198.716830
+97  12672.0  812.633240      412.097543   198.873965

 [98 rows x 4 columns]
 </pre></div>
@@ -397,7 +397,7 @@ We will then compare its performance against (1) <code class="code docutils lite
 Note however that the PyTorch <cite>softmax</cite> operation is more general and will works on tensors of any shape.</p></li>
 </ul>
 </div></blockquote>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  22.095 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  22.770 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-02-fused-softmax-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/d91442ac2982c4e0cc3ab0f43534afbc/02-fused-softmax.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">02-fused-softmax.py</span></code></a></p>
--- a/v1.1.2/getting-started/tutorials/03-matrix-multiplication.html
+++ b/v1.1.2/getting-started/tutorials/03-matrix-multiplication.html
@@ -568,12 +568,12 @@ torch_output=tensor([[  1.1045, -36.9688,  31.4688,  ..., -11.3906,  24.4531, -3
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>matmul-performance:
         M     cuBLAS  ...     Triton  Triton (+ LeakyReLU)
-0    256.0   2.730667  ...   3.276800              2.978909
+0    256.0   2.730667  ...   2.978909              2.978909
 1    384.0   7.372800  ...   8.507077              7.899428
 2    512.0  14.563555  ...  16.384000             16.384000
 3    640.0  22.260869  ...  24.380953             24.380953
 4    768.0  32.768000  ...  34.028308             34.028308
-5    896.0  37.971025  ...  39.025776             37.971025
+5    896.0  37.971025  ...  39.025776             39.025776
 6   1024.0  49.932191  ...  52.428801             52.428801
 7   1152.0  44.566925  ...  46.656000             46.656000
 8   1280.0  51.200001  ...  56.888887             56.888887
@@ -583,27 +583,27 @@ torch_output=tensor([[  1.1045, -36.9688,  31.4688,  ..., -11.3906,  24.4531, -3
 12  1792.0  72.983276  ...  72.047592             71.588687
 13  1920.0  68.776119  ...  70.172588             70.172588
 14  2048.0  73.908442  ...  76.959706             76.608294
-15  2176.0  83.155572  ...  85.998493             85.269692
+15  2176.0  83.155572  ...  85.632545             85.632545
 16  2304.0  68.251065  ...  76.563695             76.319081
-17  2432.0  71.125224  ...  82.874527             83.614477
-18  2560.0  77.833728  ...  81.108913             80.908642
-19  2688.0  83.552988  ...  90.102270             89.676257
-20  2816.0  84.035084  ...  83.233226             83.233226
-21  2944.0  82.102191  ...  82.784108             82.784108
-22  3072.0  81.707223  ...  88.612060             88.820552
-23  3200.0  84.936964  ...  95.309011             94.955488
-24  3328.0  82.939284  ...  84.795401             83.857070
-25  3456.0  81.108217  ...  84.864807             88.400840
-26  3584.0  86.540320  ...  97.734120             98.483450
-27  3712.0  82.491612  ...  86.942857             88.483034
-28  3840.0  81.798814  ...  84.614136             91.549669
-29  3968.0  85.811488  ...  91.403695             83.805851
-30  4096.0  93.466385  ...  82.441739             83.261615
+17  2432.0  71.125224  ...  74.818811             81.669953
+18  2560.0  77.833728  ...  80.709358             80.908642
+19  2688.0  83.186525  ...  89.676257             89.888756
+20  2816.0  80.173175  ...  83.074685             82.916747
+21  2944.0  81.832567  ...  82.237674             82.237674
+22  3072.0  81.707223  ...  89.310890             88.335577
+23  3200.0  84.432717  ...  95.380032             94.955488
+24  3328.0  82.939284  ...  81.346098             81.162679
+25  3456.0  82.519518  ...  85.767626             90.382926
+26  3584.0  87.381330  ...  98.808123             90.458141
+27  3712.0  81.615477  ...  88.640059             84.017953
+28  3840.0  84.228485  ...  92.159996             84.354966
+29  3968.0  90.388098  ...  87.976885             90.994735
+30  4096.0  86.592080  ...  87.495257             90.260743

 [31 rows x 5 columns]
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  25.787 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  25.557 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-03-matrix-multiplication-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/d5fee5b55a64e47f1b5724ec39adf171/03-matrix-multiplication.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">03-matrix-multiplication.py</span></code></a></p>
--- a/v1.1.2/getting-started/tutorials/05-layer-norm.html
+++ b/v1.1.2/getting-started/tutorials/05-layer-norm.html
@@ -194,36 +194,36 @@ to download the full example code</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>layer-norm-backward:
          N      Triton       Torch        Apex
-0    1024.0  307.200008   99.497980  311.088617
+0    1024.0  307.200008   99.497980  315.076934
 1    1536.0  351.085717  134.050910  344.523365
 2    2048.0  423.724127  159.067963  332.108094
 3    2560.0  458.507457  182.314537  330.322572
 4    3072.0  515.580429  191.501303  316.429186
-5    3584.0  544.405080  207.768111  311.652167
-6    4096.0  564.965515  220.907859  298.796351
-7    4608.0  498.162157  232.336141  288.751954
-8    5120.0  527.381977  243.809526  289.129408
+5    3584.0  547.872604  207.768111  310.527060
+6    4096.0  568.231237  220.412561  298.796351
+7    4608.0  500.416301  232.336141  289.507855
+8    5120.0  529.655159  243.326731  289.129408
 9    5632.0  540.671974  244.426754  291.310338
-10   6144.0  548.163546  250.775512  286.879370
+10   6144.0  550.208948  250.775512  287.438593
 11   6656.0  536.053693  255.590406  286.793541
-12   7168.0  516.612607  253.360829  277.470965
+12   7168.0  516.612607  253.734520  277.470965
 13   7680.0  488.912481  266.743841  284.884090
-14   8192.0  463.698115  258.354805  278.087683
-15   8704.0  416.958106  267.472468  284.987724
-16   9216.0  431.157889  272.394084  289.887291
+14   8192.0  464.794337  258.354805  278.087683
+15   8704.0  416.958106  267.472468  285.377055
+16   9216.0  431.157889  272.059034  289.887291
 17   9728.0  439.683593  279.942444  288.950501
-18  10240.0  446.836366  287.102804  290.153487
-19  10752.0  430.079980  246.699797  289.941565
-20  11264.0  429.786952  245.313973  286.069848
-21  11776.0  420.571432  249.447482  288.981596
-22  12288.0  418.909088  254.453844  294.323369
+18  10240.0  446.836366  287.102804  290.496460
+19  10752.0  430.079980  246.464170  289.941565
+20  11264.0  430.471331  245.313973  286.069848
+21  11776.0  419.946507  249.227509  288.981596
+22  12288.0  418.314886  254.453844  294.323369
 23  12800.0  414.016170  253.884294  288.721817
-24  13312.0  411.181478  252.360194  289.391298
-25  13824.0  404.112047  256.991469  291.799461
+24  13312.0  411.711355  252.459903  289.129403
+25  13824.0  404.604870  256.991469  291.799461
 26  14336.0  395.021816  255.809666  289.129416
-27  14848.0  384.829370  257.479779  289.012175
-28  15360.0  376.932517  258.332158  287.775181
-29  15872.0  369.832994  261.626369  290.562936
+27  14848.0  385.245405  257.479779  289.012175
+28  15360.0  376.547496  258.332158  287.550706
+29  15872.0  369.474279  261.446802  290.562936
 </pre></div>
 </div>
 <div class="line-block">
@@ -477,7 +477,7 @@ to download the full example code</p>
 <span class="n">bench_layer_norm</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">save_path</span><span class="o">=</span><span class="s1">&#39;.&#39;</span><span class="p">,</span> <span class="n">print_data</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  11.638 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  10.802 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-05-layer-norm-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/935c0dd0fbeb4b2e69588471cbb2d4b2/05-layer-norm.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">05-layer-norm.py</span></code></a></p>
--- a/v1.1.2/getting-started/tutorials/sg_execution_times.html
+++ b/v1.1.2/getting-started/tutorials/sg_execution_times.html
@@ -174,7 +174,7 @@
            
  <div class="section" id="computation-times">
 <span id="sphx-glr-getting-started-tutorials-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>12:45.752</strong> total execution time for <strong>getting-started_tutorials</strong> files:</p>
+<p><strong>12:42.179</strong> total execution time for <strong>getting-started_tutorials</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -183,19 +183,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="03-matrix-multiplication.html#sphx-glr-getting-started-tutorials-03-matrix-multiplication-py"><span class="std std-ref">Matrix Multiplication</span></a> (<code class="docutils literal notranslate"><span class="pre">03-matrix-multiplication.py</span></code>)</p></td>
-<td><p>05:25.787</p></td>
+<td><p>05:25.557</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="02-fused-softmax.html#sphx-glr-getting-started-tutorials-02-fused-softmax-py"><span class="std std-ref">Fused Softmax</span></a> (<code class="docutils literal notranslate"><span class="pre">02-fused-softmax.py</span></code>)</p></td>
-<td><p>03:22.095</p></td>
+<td><p>03:22.770</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="05-layer-norm.html#sphx-glr-getting-started-tutorials-05-layer-norm-py"><span class="std std-ref">Layer Normalization</span></a> (<code class="docutils literal notranslate"><span class="pre">05-layer-norm.py</span></code>)</p></td>
-<td><p>02:11.638</p></td>
+<td><p>02:10.802</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="01-vector-add.html#sphx-glr-getting-started-tutorials-01-vector-add-py"><span class="std std-ref">Vector Addition</span></a> (<code class="docutils literal notranslate"><span class="pre">01-vector-add.py</span></code>)</p></td>
-<td><p>01:46.221</p></td>
+<td><p>01:43.041</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="04-low-memory-dropout.html#sphx-glr-getting-started-tutorials-04-low-memory-dropout-py"><span class="std std-ref">Low-Memory Dropout</span></a> (<code class="docutils literal notranslate"><span class="pre">04-low-memory-dropout.py</span></code>)</p></td>