[OPS] Add performance model for gemm/gemv (#397)

Significantly improves the performance of `triton.ops.matmul` in memory-bound settings via the use of many more block configs coupled with a performance model to drive the auto-tuning process.
This commit is contained in:
daadaada
2021-12-22 01:56:10 +08:00
committed by GitHub
parent 5cdb948c05
commit 39d4bfed83
12 changed files with 289 additions and 27 deletions

View File

@@ -25,7 +25,7 @@ def nvsmi(attrs):
matmul_data = {
# square
(256 , 256 , 256 ) : {'v100': 0.027},
(512 , 512 , 512 ) : {'v100': 0.141},
(512 , 512 , 512 ) : {'v100': 0.158},
(1024, 1024, 1024 ) : {'v100': 0.466},
(2048, 2048, 2048 ) : {'v100': 0.680},
(4096, 4096, 4096 ) : {'v100': 0.831},
@@ -35,10 +35,10 @@ matmul_data = {
(16 , 4096, 4096 ) : {'v100': 0.0883},
(16 , 8192, 8192 ) : {'v100': 0.101},
(64 , 1024, 1024 ) : {'v100': 0.073},
(64 , 4096, 4096 ) : {'v100': 0.228},
(64 , 4096, 4096 ) : {'v100': 0.270},
(64 , 8192, 8192 ) : {'v100': 0.360},
(1024, 64 , 1024 ) : {'v100': 0.0692},
(4096, 64 , 4096 ) : {'v100': 0.223},
(4096, 64 , 4096 ) : {'v100': 0.264},
(8192, 64 , 8192 ) : {'v100': 0.323},
# # deep reductions
# (64 , 64 , 16384) : {'v100': 0.},