[GH-PAGES] Updated website
This commit is contained in:
@@ -406,32 +406,32 @@ and (1) checks any shape constraint; (2) allocates the output; (3) launches the
|
||||
</pre></div>
|
||||
</div>
|
||||
<p class="sphx-glr-script-out">Out:</p>
|
||||
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>tensor([[-0.0000e+00, 2.9438e+01, -1.3113e-06, ..., 9.7266e+00,
|
||||
-3.4237e-04, -0.0000e+00],
|
||||
[-1.7615e-01, -0.0000e+00, 6.1914e+00, ..., 3.7562e+01,
|
||||
-0.0000e+00, -0.0000e+00],
|
||||
[ 9.9531e+00, 1.9078e+01, -0.0000e+00, ..., 3.6934e+00,
|
||||
1.6578e+01, 2.1031e+01],
|
||||
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>tensor([[-5.9605e-08, 5.1094e+01, -1.8477e-05, ..., 2.6547e+01,
|
||||
-7.2598e-05, -4.2510e-04],
|
||||
[-2.7100e-01, -3.0220e-05, 5.9414e+00, ..., 2.8340e+00,
|
||||
-1.8644e-04, 1.3094e+01],
|
||||
[-1.5332e-01, 4.8125e+00, 8.4277e-01, ..., 3.6387e+00,
|
||||
4.3375e+01, 1.6865e+00],
|
||||
...,
|
||||
[ 2.6547e+01, -1.1802e-05, 7.7852e+00, ..., 5.2156e+01,
|
||||
3.5469e+01, 1.5602e+01],
|
||||
[-0.0000e+00, -0.0000e+00, 1.6531e+01, ..., 2.1211e+00,
|
||||
1.7412e+00, 1.1422e+01],
|
||||
[-2.6550e-02, -1.1325e-05, 3.0344e+01, ..., -9.1248e-03,
|
||||
-1.5199e-05, 3.8164e+00]], device='cuda:0', dtype=torch.float16)
|
||||
tensor([[-0.0000e+00, 2.9438e+01, -1.3113e-06, ..., 9.7266e+00,
|
||||
-3.4261e-04, -0.0000e+00],
|
||||
[-1.7615e-01, -0.0000e+00, 6.1914e+00, ..., 3.7562e+01,
|
||||
-0.0000e+00, -0.0000e+00],
|
||||
[ 9.9531e+00, 1.9078e+01, -0.0000e+00, ..., 3.6934e+00,
|
||||
1.6578e+01, 2.1031e+01],
|
||||
[-0.0000e+00, 2.9453e+01, -4.7684e-07, ..., 6.2617e+00,
|
||||
4.1133e+00, -0.0000e+00],
|
||||
[ 1.6562e+01, -8.1539e-04, 1.3836e+01, ..., 1.9844e+00,
|
||||
-1.1238e-02, 8.4375e+00],
|
||||
[-1.0876e-01, -2.7295e-01, 3.2156e+01, ..., -1.6907e-02,
|
||||
-0.0000e+00, -0.0000e+00]], device='cuda:0', dtype=torch.float16)
|
||||
tensor([[-5.9605e-08, 5.1094e+01, -1.8537e-05, ..., 2.6547e+01,
|
||||
-7.2658e-05, -4.2605e-04],
|
||||
[-2.7100e-01, -3.0220e-05, 5.9414e+00, ..., 2.8340e+00,
|
||||
-1.8632e-04, 1.3094e+01],
|
||||
[-1.5332e-01, 4.8125e+00, 8.4277e-01, ..., 3.6387e+00,
|
||||
4.3375e+01, 1.6875e+00],
|
||||
...,
|
||||
[ 2.6547e+01, -1.1802e-05, 7.7852e+00, ..., 5.2156e+01,
|
||||
3.5469e+01, 1.5602e+01],
|
||||
[-0.0000e+00, -0.0000e+00, 1.6531e+01, ..., 2.1211e+00,
|
||||
1.7412e+00, 1.1422e+01],
|
||||
[-2.6550e-02, -1.1325e-05, 3.0344e+01, ..., -9.1324e-03,
|
||||
-1.5199e-05, 3.8164e+00]], device='cuda:0', dtype=torch.float16)
|
||||
[-0.0000e+00, 2.9453e+01, -4.7684e-07, ..., 6.2617e+00,
|
||||
4.1133e+00, -0.0000e+00],
|
||||
[ 1.6562e+01, -8.1778e-04, 1.3836e+01, ..., 1.9844e+00,
|
||||
-1.1238e-02, 8.4375e+00],
|
||||
[-1.0876e-01, -2.7295e-01, 3.2156e+01, ..., -1.6891e-02,
|
||||
-0.0000e+00, -0.0000e+00]], device='cuda:0', dtype=torch.float16)
|
||||
tensor(True, device='cuda:0')
|
||||
</pre></div>
|
||||
</div>
|
||||
@@ -472,39 +472,39 @@ tensor(True, device='cuda:0')
|
||||
<p class="sphx-glr-script-out">Out:</p>
|
||||
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span> M cuBLAS Triton
|
||||
0 512.0 20.164923 15.420235
|
||||
1 768.0 58.982401 42.130286
|
||||
1 768.0 58.982401 40.215272
|
||||
2 1024.0 91.180520 72.315584
|
||||
3 1280.0 157.538463 117.028568
|
||||
4 1536.0 150.593357 147.455995
|
||||
5 1792.0 212.064605 193.783168
|
||||
6 2048.0 197.379013 151.146088
|
||||
7 2304.0 243.753804 179.608068
|
||||
8 2560.0 237.449270 217.006622
|
||||
9 2816.0 233.231062 200.987140
|
||||
4 1536.0 153.867127 144.446699
|
||||
5 1792.0 208.137481 190.498706
|
||||
6 2048.0 199.728763 152.520144
|
||||
7 2304.0 246.266731 178.267699
|
||||
8 2560.0 235.741014 215.578957
|
||||
9 2816.0 231.990461 198.246398
|
||||
10 3072.0 236.916752 221.184001
|
||||
11 3328.0 234.499328 210.500857
|
||||
11 3328.0 239.173747 210.500857
|
||||
12 3584.0 248.385067 230.552287
|
||||
13 3840.0 252.493157 223.418188
|
||||
14 4096.0 263.689066 244.922869
|
||||
15 4352.0 247.295210 231.639115
|
||||
16 4608.0 274.573240 254.803966
|
||||
17 4864.0 266.298229 245.366501
|
||||
18 5120.0 259.548513 238.312729
|
||||
19 5376.0 252.676487 237.081606
|
||||
20 5632.0 270.685535 249.046163
|
||||
21 5888.0 264.382140 242.069377
|
||||
22 6144.0 262.447761 240.565495
|
||||
23 6400.0 257.028108 235.078047
|
||||
24 6656.0 254.386204 232.699140
|
||||
25 6912.0 252.040861 232.926171
|
||||
26 7168.0 253.193644 231.815375
|
||||
27 7424.0 251.789150 232.860938
|
||||
28 7680.0 250.988932 231.727608
|
||||
29 7936.0 253.622108 232.094986
|
||||
30 8192.0 253.121589 231.859598
|
||||
13 3840.0 251.917998 222.519114
|
||||
14 4096.0 263.172024 244.032234
|
||||
15 4352.0 249.595626 232.307632
|
||||
16 4608.0 276.560014 254.803966
|
||||
17 4864.0 266.614125 245.366501
|
||||
18 5120.0 257.003930 238.096276
|
||||
19 5376.0 252.676487 236.527241
|
||||
20 5632.0 270.057027 248.514009
|
||||
21 5888.0 264.206935 242.511113
|
||||
22 6144.0 259.441481 241.205983
|
||||
23 6400.0 257.157204 235.078047
|
||||
24 6656.0 254.161678 232.699140
|
||||
25 6912.0 251.844029 233.178785
|
||||
26 7168.0 253.282797 231.740709
|
||||
27 7424.0 251.868505 230.377264
|
||||
28 7680.0 250.988932 231.606284
|
||||
29 7936.0 253.293068 229.692102
|
||||
30 8192.0 253.002304 231.360005
|
||||
</pre></div>
|
||||
</div>
|
||||
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes 36.230 seconds)</p>
|
||||
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes 32.933 seconds)</p>
|
||||
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-getting-started-tutorials-03-matrix-multiplication-py">
|
||||
<div class="sphx-glr-download sphx-glr-download-python docutils container">
|
||||
<p><a class="reference download internal" download="" href="../../_downloads/d5fee5b55a64e47f1b5724ec39adf171/03-matrix-multiplication.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">03-matrix-multiplication.py</span></code></a></p>
|
||||
|
Reference in New Issue
Block a user