[OPS] Add performance model for gemm/gemv (#397)
Significantly improves the performance of `triton.ops.matmul` in memory-bound settings via the use of many more block configs coupled with a performance model to drive the auto-tuning process.
This commit is contained in:
@@ -25,7 +25,7 @@ def nvsmi(attrs):
|
||||
matmul_data = {
|
||||
# square
|
||||
(256 , 256 , 256 ) : {'v100': 0.027},
|
||||
(512 , 512 , 512 ) : {'v100': 0.141},
|
||||
(512 , 512 , 512 ) : {'v100': 0.158},
|
||||
(1024, 1024, 1024 ) : {'v100': 0.466},
|
||||
(2048, 2048, 2048 ) : {'v100': 0.680},
|
||||
(4096, 4096, 4096 ) : {'v100': 0.831},
|
||||
@@ -35,10 +35,10 @@ matmul_data = {
|
||||
(16 , 4096, 4096 ) : {'v100': 0.0883},
|
||||
(16 , 8192, 8192 ) : {'v100': 0.101},
|
||||
(64 , 1024, 1024 ) : {'v100': 0.073},
|
||||
(64 , 4096, 4096 ) : {'v100': 0.228},
|
||||
(64 , 4096, 4096 ) : {'v100': 0.270},
|
||||
(64 , 8192, 8192 ) : {'v100': 0.360},
|
||||
(1024, 64 , 1024 ) : {'v100': 0.0692},
|
||||
(4096, 64 , 4096 ) : {'v100': 0.223},
|
||||
(4096, 64 , 4096 ) : {'v100': 0.264},
|
||||
(8192, 64 , 8192 ) : {'v100': 0.323},
|
||||
# # deep reductions
|
||||
# (64 , 64 , 16384) : {'v100': 0.},
|
||||
|
Reference in New Issue
Block a user