Database: Updated Maxwell profile
This commit is contained in:
@@ -40,9 +40,9 @@ namespace templates
|
|||||||
unsigned int gemm::lmem_usage(expression_tree const & expression) const
|
unsigned int gemm::lmem_usage(expression_tree const & expression) const
|
||||||
{
|
{
|
||||||
unsigned int N = 0;
|
unsigned int N = 0;
|
||||||
size_t llda = (A_trans_=='N')?mL_:kL_+1;
|
size_t llda = (A_trans_=='N')?mL_:kL_+vwidth_;
|
||||||
size_t lnda = (A_trans_=='N')?kL_:mL_;
|
size_t lnda = (A_trans_=='N')?kL_:mL_;
|
||||||
size_t lldb = (B_trans_=='T')?nL_:kL_+1;
|
size_t lldb = (B_trans_=='T')?nL_:kL_+vwidth_;
|
||||||
size_t lndb = (B_trans_=='T')?kL_:nL_;
|
size_t lndb = (B_trans_=='T')?kL_:nL_;
|
||||||
N += llda*lnda;
|
N += llda*lnda;
|
||||||
N += lldb*lndb;
|
N += lldb*lndb;
|
||||||
@@ -173,9 +173,9 @@ namespace templates
|
|||||||
stream << std::endl;
|
stream << std::endl;
|
||||||
|
|
||||||
stream << "//pointers" << std::endl;
|
stream << "//pointers" << std::endl;
|
||||||
size_t llda = (A_trans_=='N')?mL_:kL_+1;
|
size_t llda = (A_trans_=='N')?mL_:kL_+vwidth_;
|
||||||
size_t lnda = (A_trans_=='N')?kL_:mL_;
|
size_t lnda = (A_trans_=='N')?kL_:mL_;
|
||||||
size_t lldb = (B_trans_=='T')?nL_:kL_+1;
|
size_t lldb = (B_trans_=='T')?nL_:kL_+vwidth_;
|
||||||
size_t lndb = (B_trans_=='T')?kL_:nL_;
|
size_t lndb = (B_trans_=='T')?kL_:nL_;
|
||||||
stream << "$LOCAL " << sdtype << " lA[" << llda*lnda << "];" << std::endl;
|
stream << "$LOCAL " << sdtype << " lA[" << llda*lnda << "];" << std::endl;
|
||||||
stream << "$LOCAL " << sdtype << " lB[" << lldb*lndb << "];" << std::endl;
|
stream << "$LOCAL " << sdtype << " lB[" << lldb*lndb << "];" << std::endl;
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -82,7 +82,8 @@ def tree_of(template, sizes, context):
|
|||||||
M, N = sizes[::-1] if T else sizes
|
M, N = sizes[::-1] if T else sizes
|
||||||
A = sc.empty((M,N), context=context)
|
A = sc.empty((M,N), context=context)
|
||||||
x = sc.empty(N, context=context)
|
x = sc.empty(N, context=context)
|
||||||
return sc.dot(A.T, x) if T else sc.dot(A, x), (A, x)
|
y = sc.empty(M, context=context)
|
||||||
|
return sc.assign(x, sc.dot(A.T, y)) if T else sc.assign(y, sc.dot(A, x)), (A, x, y)
|
||||||
elif issubclass(template, sc.templates.gemm):
|
elif issubclass(template, sc.templates.gemm):
|
||||||
AT = template is sc.templates.gemm_tn or template is sc.templates.gemm_tt
|
AT = template is sc.templates.gemm_tn or template is sc.templates.gemm_tt
|
||||||
BT = template is sc.templates.gemm_nt or template is sc.templates.gemm_tt
|
BT = template is sc.templates.gemm_nt or template is sc.templates.gemm_tt
|
||||||
|
@@ -81,16 +81,15 @@ class Tuner:
|
|||||||
if level=='simple':
|
if level=='simple':
|
||||||
sizes = [(1536, 1536)]
|
sizes = [(1536, 1536)]
|
||||||
elif level=='intermediate':
|
elif level=='intermediate':
|
||||||
sizes = [(896,896),
|
sizes = []
|
||||||
(1536,1536),
|
#Square
|
||||||
(256, 256),
|
for N in [896, 1760, 2048, 2560]:
|
||||||
(1024,256),
|
sizes += [(N, N)]
|
||||||
(4096,256),
|
#Tall and Skinny
|
||||||
(16384,256),
|
for M in [16, 32, 64, 128]:
|
||||||
(256,1024),
|
for N in [1024, 4096, 16384, 65536, 262144]:
|
||||||
(256,4096),
|
sizes += [(M, N)]
|
||||||
(256,16384),
|
sizes += [(N, M)]
|
||||||
(3025,96)]
|
|
||||||
else:
|
else:
|
||||||
sizes = product(pow2range(4,17), pow2range(4,17))
|
sizes = product(pow2range(4,17), pow2range(4,17))
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user