Fixup
This commit is contained in:
@@ -202,28 +202,30 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
|
|
||||||
unsigned int npA = p_.mL/(A_trans_=='N'?p_.local_fetch_0*p_.simd_width:p_.local_fetch_1);
|
unsigned int npA = p_.mL/(A_trans_=='N'?p_.local_fetch_0*p_.simd_width:p_.local_fetch_1);
|
||||||
unsigned int npB = p_.nL/(B_trans_=='T'?p_.local_fetch_0*p_.simd_width:p_.local_fetch_1);
|
unsigned int npB = p_.nL/(B_trans_=='T'?p_.local_fetch_0*p_.simd_width:p_.local_fetch_1);
|
||||||
if (A_trans_=='N')
|
|
||||||
stream << "__global " << vdtype << "* Ai[" << npA << "] = {A + (gidx*" << p_.mL/p_.simd_width << ")" << ASTRIDE1 << " + idyT*Ald + offz*Ald};" << std::endl;
|
|
||||||
else
|
|
||||||
stream << "__global " << vdtype << "* Ai[" << npA << "] = {A + idxT" << ASTRIDE1 << " + gidx*" << p_.mL/p_.simd_width << "*Ald + offz};" << std::endl;
|
|
||||||
|
|
||||||
|
stream << "__global " << vdtype << "* Ai[" << npA << "];" << std::endl;
|
||||||
|
for(unsigned int i = 0 ; i < npA ; ++i)
|
||||||
|
if (A_trans_=='N')
|
||||||
|
stream << "Ai[" << i << "] = A + (gidx*" << p_.mL/p_.simd_width << ")" << ASTRIDE1 << " + idyT*Ald + offz*Ald, A + (gidx*" << p_.mL/p_.simd_width << ")" << ASTRIDE1 << " + idyT*Ald + offz*Ald;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "Ai[" << i << "] = A + idxT" << ASTRIDE1 << " + gidx*" << p_.mL/p_.simd_width << "*Ald + offz;" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
stream << "__global " << vdtype << "* Bi[" << npB << "];" << std::endl;
|
||||||
|
for(unsigned int i = 0 ; i < npB ; ++i)
|
||||||
if(B_trans_=='T')
|
if(B_trans_=='T')
|
||||||
stream << "__global " << vdtype << "* Bi[" << npB << "] = {B};" << std::endl;
|
stream << "Bi[" << i << "] = B + (gidy*" << p_.nL/p_.simd_width << ")" << BSTRIDE1 << " + idyT*Bld + offz*Bld, B + (gidy*" << p_.nL/p_.simd_width << ")" << BSTRIDE1 << " + idyT*Bld + offz*Bld;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "Bi[" << i << "] = B + idxT" << BSTRIDE1 << " + gidy*" << p_.nL/p_.simd_width << "*Bld + offz;" << std::endl;
|
||||||
|
|
||||||
switch (p_.A_fetching_policy)
|
switch (p_.A_fetching_policy)
|
||||||
{
|
{
|
||||||
case FETCH_FROM_LOCAL:
|
case FETCH_FROM_LOCAL:
|
||||||
for(unsigned int i = 0 ; i < npA ; i++ )
|
for(unsigned int i = 0 ; i < npA ; i++ )
|
||||||
if (A_trans_=='N')
|
if (A_trans_=='N')
|
||||||
{
|
stream << "if(gidx*" << p_.mL << " + idxT*" << p_.simd_width << " + " << i << "*" << p_.local_fetch_0*p_.simd_width << " < M) Ai[" << i << "] += (idxT + " << i*p_.local_fetch_0 << ")" << ASTRIDE1 << ";" << std::endl;
|
||||||
stream << "Ai[" << i << "] += (gidx*" << p_.mL/p_.simd_width << ") " << ASTRIDE1 << " + idyT*Ald + offz*Ald;" << std::endl;
|
|
||||||
stream << "if(gidx*" << p_.mL << " + idxT + " << i << "*" << p_.local_fetch_0*p_.simd_width << " < M) Ai[" << i << "] += (idxT + " << i*p_.local_fetch_0 << ")" << ASTRIDE1 << ";" << std::endl;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
stream << "Ai[" << i << "] += idxT" << ASTRIDE1 << " + gidx*" << p_.mL/p_.simd_width << "*Ald + offz;" << std::endl;
|
|
||||||
stream << "if(gidx*" << p_.mL << " + idyT + " << i << "*" << p_.local_fetch_1 << " < M) Ai[" << i << "] += (idyT + " << i*p_.local_fetch_1 << ")*Ald;" << std::endl;
|
stream << "if(gidx*" << p_.mL << " + idyT + " << i << "*" << p_.local_fetch_1 << " < M) Ai[" << i << "] += (idyT + " << i*p_.local_fetch_1 << ")*Ald;" << std::endl;
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FETCH_FROM_GLOBAL_CONTIGUOUS:
|
case FETCH_FROM_GLOBAL_CONTIGUOUS:
|
||||||
@@ -248,16 +250,9 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
case FETCH_FROM_LOCAL:
|
case FETCH_FROM_LOCAL:
|
||||||
for(unsigned int i = 0 ; i < npB ; i++ )
|
for(unsigned int i = 0 ; i < npB ; i++ )
|
||||||
if (B_trans_=='T')
|
if (B_trans_=='T')
|
||||||
{
|
stream << "if(gidy*" << p_.nL << " + idxT* " << p_.simd_width << " + " << i << "*" << p_.local_fetch_0*p_.simd_width << " < N) Bi[" << i << "] += (idxT + " << i*p_.local_fetch_0 << ")" << BSTRIDE1 << ";" << std::endl;
|
||||||
stream << "Bi[" << i << "] += (gidy*" << p_.nL/p_.simd_width << ")" << BSTRIDE1 << " + idyT*Bld + offz*Bld;" << std::endl;
|
|
||||||
stream << "if(gidy*" << p_.nL << " + idxT + " << i << "*" << p_.local_fetch_0*p_.simd_width << " < N) Bi[" << i << "] += (idxT + " << i*p_.local_fetch_0 << ")" << BSTRIDE1 << ";" << std::endl;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
stream << "Bi[" << i << "] += idxT" << BSTRIDE1 << " + gidy*" << p_.nL/p_.simd_width << "*Bld + offz;" << std::endl;
|
|
||||||
stream << "if(gidy*" << p_.nL << " + idyT + " << i << "*" << p_.local_fetch_1 << " < N) Bi[" << i << "] += (idyT + " << i*p_.local_fetch_1 << ")*Bld;" << std::endl;
|
stream << "if(gidy*" << p_.nL << " + idyT + " << i << "*" << p_.local_fetch_1 << " < N) Bi[" << i << "] += (idyT + " << i*p_.local_fetch_1 << ")*Bld;" << std::endl;
|
||||||
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FETCH_FROM_GLOBAL_CONTIGUOUS:
|
case FETCH_FROM_GLOBAL_CONTIGUOUS:
|
||||||
@@ -309,7 +304,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
for(int_t k = 0; k < p_.mL; k += p_.local_fetch_1)
|
for(int_t k = 0; k < p_.mL; k += p_.local_fetch_1)
|
||||||
for(int_t m = 0; m < p_.kL; m += p_.local_fetch_0*p_.simd_width)
|
for(int_t m = 0; m < p_.kL; m += p_.local_fetch_0*p_.simd_width)
|
||||||
{
|
{
|
||||||
string to_load = "Ai[" + to_string(k) + "][" + to_string(m/p_.simd_width) + ASTRIDE1 + "]";
|
string to_load = "Ai[" + to_string(k/p_.local_fetch_1) + "][" + to_string(m/p_.simd_width) + ASTRIDE1 + "]";
|
||||||
stream << VSTORE(to_load, "0", "lAstore + lAstart + " + to_string(m*lAld+k)) << ";" << std::endl;
|
stream << VSTORE(to_load, "0", "lAstore + lAstart + " + to_string(m*lAld+k)) << ";" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -325,7 +320,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
for(int_t k = 0; k < p_.nL; k += p_.local_fetch_1)
|
for(int_t k = 0; k < p_.nL; k += p_.local_fetch_1)
|
||||||
for(int_t n = 0; n < p_.kL; n += p_.local_fetch_0*p_.simd_width)
|
for(int_t n = 0; n < p_.kL; n += p_.local_fetch_0*p_.simd_width)
|
||||||
{
|
{
|
||||||
string to_load = "Bi[" + to_string(k) + "][" + to_string(n/p_.simd_width) + BSTRIDE1 + "]";
|
string to_load = "Bi[" + to_string(k/p_.local_fetch_1) + "][" + to_string(n/p_.simd_width) + BSTRIDE1 + "]";
|
||||||
stream << VSTORE(to_load, "0", "lBstore + lBstart + " + to_string(n*lBld+k)) << ";" << std::endl;
|
stream << VSTORE(to_load, "0", "lBstore + lBstart + " + to_string(n*lBld+k)) << ";" << std::endl;
|
||||||
}
|
}
|
||||||
stream << LocalBarrier(backend) << ";" << std::endl;
|
stream << LocalBarrier(backend) << ";" << std::endl;
|
||||||
@@ -513,7 +508,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
stream << "}" << std::endl;
|
stream << "}" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << stream.str() << std::endl;
|
// std::cout << stream.str() << std::endl;
|
||||||
return stream.str();
|
return stream.str();
|
||||||
|
|
||||||
#undef HANDLE_BOUNDS
|
#undef HANDLE_BOUNDS
|
||||||
@@ -682,13 +677,13 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
|
|
||||||
execution_options_type const & options = ctr.execution_options();
|
execution_options_type const & options = ctr.execution_options();
|
||||||
|
|
||||||
if (ldstrideA> 1 || ldstrideB > 1 || ldstrideC > 1
|
// if (ldstrideA> 1 || ldstrideB > 1 || ldstrideC > 1
|
||||||
|| (p_.simd_width>1 && (ldstartA % p_.simd_width > 0 || ldstartB % p_.simd_width > 0 || pA->ld()%p_.simd_width > 0 || pB->ld()%p_.simd_width > 0)))
|
// || (p_.simd_width>1 && (ldstartA % p_.simd_width > 0 || ldstartB % p_.simd_width > 0 || pA->ld()%p_.simd_width > 0 || pB->ld()%p_.simd_width > 0)))
|
||||||
{
|
// {
|
||||||
fallback.enqueue_block(queue, M, N, K, create_slice(*pA, 0, M, 0, K, swap_A), create_slice(*pB, 0, K, 0, N, swap_B),
|
// fallback.enqueue_block(queue, M, N, K, create_slice(*pA, 0, M, 0, K, swap_A), create_slice(*pB, 0, K, 0, N, swap_B),
|
||||||
create_slice(*pC, 0, M, 0, N, false), alpha, beta, program, "fallback", options);
|
// create_slice(*pC, 0, M, 0, N, false), alpha, beta, program, "fallback", options);
|
||||||
return;
|
// return;
|
||||||
}
|
// }
|
||||||
|
|
||||||
enqueue_block(queue, M, N, K, create_slice(*pA, 0, M, 0, K, swap_A), create_slice(*pB, 0, K, 0, N, swap_B), create_slice(*pC, 0, M, 0, N, false), alpha, beta, program, suffix, options);
|
enqueue_block(queue, M, N, K, create_slice(*pA, 0, M, 0, K, swap_A), create_slice(*pB, 0, K, 0, N, swap_B), create_slice(*pC, 0, M, 0, N, false), alpha, beta, program, suffix, options);
|
||||||
}
|
}
|
||||||
@@ -699,7 +694,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
, int_t ms, int_t ks, int_t ns
|
, int_t ms, int_t ks, int_t ns
|
||||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||||
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), true, 'N', 'N')
|
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), false, 'N', 'N')
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -708,7 +703,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
, int_t ms, int_t ks, int_t ns
|
, int_t ms, int_t ks, int_t ns
|
||||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||||
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), true, 'T', 'N')
|
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), false, 'T', 'N')
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -717,7 +712,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
, int_t ms, int_t ks, int_t ns
|
, int_t ms, int_t ks, int_t ns
|
||||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||||
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), true, 'N', 'T')
|
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), false, 'N', 'T')
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -726,7 +721,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
, int_t ms, int_t ks, int_t ns
|
, int_t ms, int_t ks, int_t ns
|
||||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||||
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), true, 'T', 'T')
|
mproduct(mproduct_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), false, 'T', 'T')
|
||||||
{ }
|
{ }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -258,7 +258,7 @@ std::map<std::pair<expression_type, numeric_type>, tools::shared_ptr<base> > ini
|
|||||||
res[std::make_pair(COL_WISE_REDUCTION_TYPE, DTYPE)] = ptr_t(new mreduction_cols(1, 8, 8, 64, 8, FETCH_FROM_GLOBAL_STRIDED));
|
res[std::make_pair(COL_WISE_REDUCTION_TYPE, DTYPE)] = ptr_t(new mreduction_cols(1, 8, 8, 64, 8, FETCH_FROM_GLOBAL_STRIDED));
|
||||||
res[std::make_pair(MATRIX_PRODUCT_NN_TYPE, DTYPE)] = ptr_t(new mproduct_nn(1, 8, 16, 8, 1, 8, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
res[std::make_pair(MATRIX_PRODUCT_NN_TYPE, DTYPE)] = ptr_t(new mproduct_nn(1, 8, 16, 8, 1, 8, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
||||||
res[std::make_pair(MATRIX_PRODUCT_TN_TYPE, DTYPE)] = ptr_t(new mproduct_tn(1, 8, 16, 8, 1, 8, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
res[std::make_pair(MATRIX_PRODUCT_TN_TYPE, DTYPE)] = ptr_t(new mproduct_tn(1, 8, 16, 8, 1, 8, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
||||||
res[std::make_pair(MATRIX_PRODUCT_NT_TYPE, DTYPE)] = ptr_t(new mproduct_nt(1, 8, 16, 8, 1, 8, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
res[std::make_pair(MATRIX_PRODUCT_NT_TYPE, DTYPE)] = ptr_t(new mproduct_nt(4, 8, 16, 8, 1, 8, 2, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
||||||
res[std::make_pair(MATRIX_PRODUCT_TT_TYPE, DTYPE)] = ptr_t(new mproduct_tt(1, 8, 16, 8, 1, 8, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
res[std::make_pair(MATRIX_PRODUCT_TT_TYPE, DTYPE)] = ptr_t(new mproduct_tt(1, 8, 16, 8, 1, 8, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8, true));
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@@ -115,7 +115,7 @@ def main():
|
|||||||
include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")]
|
include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")]
|
||||||
|
|
||||||
#Source files
|
#Source files
|
||||||
src = 'src/lib/value_scalar.cpp src/lib/array.cpp src/lib/wrap/clBLAS.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program.cpp src/lib/driver/context.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/templates/mreduction.cpp src/lib/backend/templates/maxpy.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/mproduct.cpp src/lib/backend/templates/vaxpy.cpp src/lib/backend/templates/reduction.cpp src/lib/backend/stream.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']]
|
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/backend/templates/mreduction.cpp src/lib/backend/templates/reduction.cpp src/lib/backend/templates/mproduct.cpp src/lib/backend/templates/maxpy.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/vaxpy.cpp src/lib/backend/mapped_object.cpp src/lib/backend/stream.cpp src/lib/backend/parse.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']]
|
||||||
boostsrc = 'external/boost/libs/'
|
boostsrc = 'external/boost/libs/'
|
||||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||||
|
@@ -68,16 +68,16 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
|||||||
// CHANDLE(AT), OFF(AT), LD(AT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
// CHANDLE(AT), OFF(AT), LD(AT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||||
|
|
||||||
//Column-major
|
//Column-major
|
||||||
// RUN_TEST("GEMM(COL, N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
RUN_TEST("GEMM(COL, N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
||||||
// CHANDLE(B), OFF(B), LD(B), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
CHANDLE(B), OFF(B), LD(B), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||||
|
|
||||||
RUN_TEST("GEMM(COL, N, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
RUN_TEST("GEMM(COL, N, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasNoTrans, clblasTrans, M, N, K, alpha, CHANDLE(A), OFF(A), LD(A),
|
||||||
CHANDLE(BT), OFF(BT), LD(BT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
CHANDLE(BT), OFF(BT), LD(BT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||||
|
|
||||||
// RUN_TEST("GEMM(COL, T, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
RUN_TEST("GEMM(COL, T, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasNoTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
||||||
// CHANDLE(B), OFF(B), LD(B), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
CHANDLE(B), OFF(B), LD(B), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||||
// RUN_TEST("GEMM(COL, T, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
RUN_TEST("GEMM(COL, T, T)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasColumnMajor, clblasTrans, clblasTrans, M, N, K, alpha, CHANDLE(AT), OFF(AT), LD(AT),
|
||||||
// CHANDLE(BT), OFF(BT), LD(BT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
CHANDLE(BT), OFF(BT), LD(BT), beta, CHANDLE(C), OFF(C), LD(C), 1, &clqueue, 0, NULL, NULL));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -94,9 +94,9 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
void test_impl(T epsilon, ad::driver::Context const & ctx)
|
void test_impl(T epsilon, ad::driver::Context const & ctx)
|
||||||
{
|
{
|
||||||
int_t M = 512;
|
int_t M = 16;
|
||||||
int_t N = 512;
|
int_t N = 20;
|
||||||
int_t K = 512;
|
int_t K = 64;
|
||||||
|
|
||||||
int_t SUBM = 75;
|
int_t SUBM = 75;
|
||||||
int_t SUBN = 76;
|
int_t SUBN = 76;
|
||||||
|
Reference in New Issue
Block a user