Added GEMM benchmark
This commit is contained in:
@@ -17,7 +17,7 @@ include_directories(/opt/intel/system_studio_2015.1.045/mkl/include/)
|
|||||||
set(BLAS_DEF ${BLAS_DEF} "-DBENCH_CBLAS")
|
set(BLAS_DEF ${BLAS_DEF} "-DBENCH_CBLAS")
|
||||||
set(MKLROOT /opt/intel/system_studio_2015.1.045/mkl/)
|
set(MKLROOT /opt/intel/system_studio_2015.1.045/mkl/)
|
||||||
set(CCROOT /opt/intel/system_studio_2015.1.045/compiler/)
|
set(CCROOT /opt/intel/system_studio_2015.1.045/compiler/)
|
||||||
set(BLAS_LIBS ${MKLROOT}/lib/intel64/libmkl_intel_lp64.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a ${BLAS_LIBS} ${MKLROOT}/lib/intel64/libmkl_core.a ${CCROOT}/lib/intel64/libiomp5.so pthread dl m)
|
set(BLAS_LIBS ${MKLROOT}/lib/intel64/libmkl_intel_lp64.so ${MKLROOT}/lib/intel64/libmkl_avx.so ${MKLROOT}/lib/intel64/libmkl_intel_thread.so ${BLAS_LIBS} ${MKLROOT}/lib/intel64/libmkl_core.so ${CCROOT}/lib/intel64/libiomp5.so pthread dl m)
|
||||||
|
|
||||||
|
|
||||||
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
|
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
|
||||||
|
143
bench/blas.cpp
143
bench/blas.cpp
@@ -33,62 +33,119 @@ void bench(ad::numeric_type dtype)
|
|||||||
total_time += times.back();\
|
total_time += times.back();\
|
||||||
}\
|
}\
|
||||||
float tres = median(times);\
|
float tres = median(times);\
|
||||||
std::cout << " " << PERF(N, tres, dtsize) << std::flush;\
|
std::cout << " " << PERF << std::flush;\
|
||||||
}
|
}
|
||||||
|
|
||||||
/*---------*/
|
// /*---------*/
|
||||||
/*--BLAS1--*/
|
// /*--BLAS1--*/
|
||||||
/*---------*/
|
// /*---------*/
|
||||||
std::cout << "#AXPY" << std::endl;
|
// std::cout << "#AXPY" << std::endl;
|
||||||
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
// for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||||
{
|
// {
|
||||||
int_t N = *it;
|
// int_t N = *it;
|
||||||
std::cout << N;
|
// std::cout << N;
|
||||||
/* ATIDLAS */
|
// /* ATIDLAS */
|
||||||
atidlas::array x(N, dtype), y(N, dtype);
|
// atidlas::array x(N, dtype), y(N, dtype);
|
||||||
BENCHMARK(y = x + y, bandwidth);
|
// BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize));
|
||||||
/* clAmdBlas */
|
// /* clAmdBlas */
|
||||||
#ifdef BENCH_CLAMDBLAS
|
//#ifdef BENCH_CLAMDBLAS
|
||||||
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth)
|
// BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
|
||||||
#endif
|
//#endif
|
||||||
/* BLAS */
|
// /* BLAS */
|
||||||
#ifdef BENCH_CBLAS
|
//#ifdef BENCH_CBLAS
|
||||||
std::vector<float> cx(N), cy(N);
|
// std::vector<float> cx(N), cy(N);
|
||||||
atidlas::copy(x, cx);
|
// atidlas::copy(x, cx);
|
||||||
atidlas::copy(y, cy);
|
// atidlas::copy(y, cy);
|
||||||
BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth);
|
// BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
|
||||||
#endif
|
//#endif
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
}
|
// }
|
||||||
std::cout << "\n\n" << std::flush;
|
// std::cout << "\n\n" << std::flush;
|
||||||
|
|
||||||
|
|
||||||
// //DOT
|
|
||||||
// BENCH(DECLARE(ad::pointed_scalar s(dtype)); DECLARE(ad::vector, x(*it, dtype), y(*it, dtype)), s = dot(x, y), BLAS1_N, bandwidth, 2*(*it), "dot");
|
|
||||||
|
|
||||||
|
// std::cout << "#DOT" << std::endl;
|
||||||
|
// for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||||
|
// {
|
||||||
|
// int_t N = *it;
|
||||||
|
// std::cout << N;
|
||||||
|
// /* ATIDLAS */
|
||||||
|
// atidlas::array x(N, dtype), y(N, dtype);
|
||||||
|
// atidlas::array scratch(N, dtype);
|
||||||
|
// atidlas::scalar s(dtype);
|
||||||
|
// BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
|
||||||
|
// /* clAmdBlas */
|
||||||
|
//#ifdef BENCH_CLAMDBLAS
|
||||||
|
// BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
|
||||||
|
//#endif
|
||||||
|
// /* BLAS */
|
||||||
|
//#ifdef BENCH_CBLAS
|
||||||
|
// std::vector<float> cx(N), cy(N);
|
||||||
|
// atidlas::copy(x, cx);
|
||||||
|
// atidlas::copy(y, cy);
|
||||||
|
// BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
|
||||||
|
//#endif
|
||||||
|
// std::cout << std::endl;
|
||||||
|
// }
|
||||||
|
// std::cout << "\n\n" << std::flush;
|
||||||
|
|
||||||
// /*---------*/
|
// /*---------*/
|
||||||
// /*--BLAS2--*/
|
// /*--BLAS2--*/
|
||||||
// /*---------*/
|
// /*---------*/
|
||||||
|
|
||||||
// //N-layout
|
|
||||||
// for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
|
|
||||||
// {
|
|
||||||
// BENCH(DECLARE(atidlas::matrix, A(*Mit,*it)); DECLARE(atidlas::vector, y(*Mit), x(*it)),ARGS(y, viennacl::op_assign(), viennacl::linalg::prod(A,x)), BLAS2_N,
|
|
||||||
// bandwidth, (*Mit)*(*it), "row-wise-reductionN-float32");
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
// //T-layout
|
// //T-layout
|
||||||
|
// std::cout << "#GEMV-T" << std::endl;
|
||||||
// for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
|
// for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
|
||||||
// {
|
// for(std::vector<int_t>::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit)
|
||||||
// BENCH(DECLARE(atidlas::matrix, A(*it,*Mit)) ; DECLARE(atidlas::vector, y(*Mit), x(*it)), ARGS(y, viennacl::op_assign(), viennacl::linalg::prod(viennacl::trans(A),x)), BLAS2_N,
|
// {
|
||||||
// bandwidth, (*Mit)*(*it), "row-wise-reductionT-float32");
|
// int_t M = *Mit;
|
||||||
// }
|
// int_t N = *Nit;
|
||||||
|
// std::cout << M << "," << N;
|
||||||
|
// /* ATIDLAS */
|
||||||
|
// atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||||
|
// BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
|
||||||
|
// /* clAmdBlas */
|
||||||
|
// #ifdef BENCH_CLAMDBLAS
|
||||||
|
// BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
|
||||||
|
// #endif
|
||||||
|
// /* BLAS */
|
||||||
|
// #ifdef BENCH_CBLAS
|
||||||
|
// std::vector<float> cA(N*M), cx(N), cy(M);
|
||||||
|
// atidlas::copy(x, cx);
|
||||||
|
// atidlas::copy(y, cy);
|
||||||
|
// atidlas::copy(A, cA);
|
||||||
|
// BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
|
||||||
|
// #endif
|
||||||
|
// std::cout << std::endl;
|
||||||
|
// }
|
||||||
|
// std::cout << "\n\n" << std::flush;
|
||||||
|
|
||||||
// /*---------*/
|
// /*---------*/
|
||||||
// /*--BLAS3--*/
|
// /*--BLAS3--*/
|
||||||
// /*---------*/
|
// /*---------*/
|
||||||
|
std::cout << "#GEMM-NT" << std::endl;
|
||||||
|
for(std::vector<int_t>::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit)
|
||||||
|
for(std::vector<int_t>::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit)
|
||||||
|
for(std::vector<int_t>::const_iterator Kit = BLAS3_K.begin() ; Kit != BLAS3_K.end() ; ++Kit)
|
||||||
|
{
|
||||||
|
int_t M = *Mit, N = *Nit, K = *Kit;
|
||||||
|
std::cout << M << "," << N << "," << K;
|
||||||
|
/* ATIDLAS */
|
||||||
|
atidlas::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype);
|
||||||
|
BENCHMARK(C = dot(A,trans(B)), gflops((double)2*M*N*K, tres));
|
||||||
|
/* clAmdBlas */
|
||||||
|
#ifdef BENCH_CLAMDBLAS
|
||||||
|
BENCHMARK(clAmdBlasSgemm(clAmdBlasColumnMajor, clAmdBlasNoTrans, clAmdBlasTrans, M, N, K, 1, A.data()(), A.ld(), B.data()(), B.ld(),
|
||||||
|
0, C.data()(), C.ld(), 1, &atidlas::cl::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres))
|
||||||
|
#endif
|
||||||
|
/* BLAS */
|
||||||
|
#ifdef BENCH_CBLAS
|
||||||
|
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
|
||||||
|
atidlas::copy(C, cC);
|
||||||
|
atidlas::copy(A, cA);
|
||||||
|
atidlas::copy(B, cB);
|
||||||
|
BENCHMARK(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), M, cB.data(), N, 1, cC.data(), M), gflops((double)2*M*N*K, tres));
|
||||||
|
#endif
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
|
@@ -51,13 +51,16 @@ static const std::vector<int> BLAS2_M = make_vector<int>() << 256;
|
|||||||
static const std::vector<int> BLAS2_N = create_full_range(128, 5000, 64);
|
static const std::vector<int> BLAS2_N = create_full_range(128, 5000, 64);
|
||||||
|
|
||||||
// BLAS3 Sizes
|
// BLAS3 Sizes
|
||||||
static const std::vector<int> BLAS3_N = create_full_range(128, 5000, 64);
|
static const std::vector<int> BLAS3_M = make_vector<int>() << 128;
|
||||||
|
static const std::vector<int> BLAS3_N = make_vector<int>() << 128;
|
||||||
|
static const std::vector<int> BLAS3_K = create_full_range(128, 5000, 64);
|
||||||
|
|
||||||
|
|
||||||
float bandwidth(std::size_t N, float t, unsigned int dtsize)
|
float bandwidth(std::size_t N, float t, unsigned int dtsize)
|
||||||
{
|
{ return N * dtsize * 1e-9 / t; }
|
||||||
return N * dtsize * 1e-9 / t;
|
|
||||||
}
|
double gflops(double nops, double t)
|
||||||
|
{ return nops * 1e-9 / t; }
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
T median(std::vector<T> x)
|
T median(std::vector<T> x)
|
||||||
|
@@ -42,7 +42,6 @@ namespace atidlas
|
|||||||
|
|
||||||
typedef std::map<std::pair<expression_type, numeric_type>, tools::shared_ptr<model> > model_map_t;
|
typedef std::map<std::pair<expression_type, numeric_type>, tools::shared_ptr<model> > model_map_t;
|
||||||
|
|
||||||
model_map_t import(std::string const & fname);
|
|
||||||
model_map_t init_models(cl::CommandQueue const & queue);
|
model_map_t init_models(cl::CommandQueue const & queue);
|
||||||
model_map_t& get_model_map(cl::CommandQueue & queue);
|
model_map_t& get_model_map(cl::CommandQueue & queue);
|
||||||
model& get_model(cl::CommandQueue & queue, expression_type, numeric_type);
|
model& get_model(cl::CommandQueue & queue, expression_type, numeric_type);
|
||||||
|
@@ -49,7 +49,8 @@ std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx)
|
|||||||
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
|
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
|
||||||
if(it->first()==ctx())
|
if(it->first()==ctx())
|
||||||
return it->second;
|
return it->second;
|
||||||
throw std::out_of_range("The context provided is not registered");
|
queues.push_back(std::make_pair(ctx, std::vector<cl::CommandQueue>(1, cl::CommandQueue(ctx, ctx.getInfo<CL_CONTEXT_DEVICES>()[0]))));
|
||||||
|
return queues.back().second;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl::CommandQueue & get_queue(cl::Context const & ctx, std::size_t idx)
|
cl::CommandQueue & get_queue(cl::Context const & ctx, std::size_t idx)
|
||||||
|
@@ -143,7 +143,7 @@ namespace detail
|
|||||||
{
|
{
|
||||||
if(name=="vaxpy") return VECTOR_AXPY_TYPE;
|
if(name=="vaxpy") return VECTOR_AXPY_TYPE;
|
||||||
if(name=="reduction") return REDUCTION_TYPE;
|
if(name=="reduction") return REDUCTION_TYPE;
|
||||||
if(name=="matrix-axpy") return MATRIX_AXPY_TYPE;
|
if(name=="maxpy") return MATRIX_AXPY_TYPE;
|
||||||
if(name=="row-wise-reductionN") return ROW_WISE_REDUCTION_TYPE;
|
if(name=="row-wise-reductionN") return ROW_WISE_REDUCTION_TYPE;
|
||||||
if(name=="row-wise-reductionT") return COL_WISE_REDUCTION_TYPE;
|
if(name=="row-wise-reductionT") return COL_WISE_REDUCTION_TYPE;
|
||||||
if(name=="matrix-productNN") return MATRIX_PRODUCT_NN_TYPE;
|
if(name=="matrix-productNN") return MATRIX_PRODUCT_NN_TYPE;
|
||||||
@@ -167,7 +167,7 @@ namespace detail
|
|||||||
return tools::shared_ptr<base>(new vaxpy( vaxpy_parameters(a[0], a[1], a[2], fetch[a[3]])));
|
return tools::shared_ptr<base>(new vaxpy( vaxpy_parameters(a[0], a[1], a[2], fetch[a[3]])));
|
||||||
else if(template_name=="reduction")
|
else if(template_name=="reduction")
|
||||||
return tools::shared_ptr<base>(new reduction( reduction_parameters(a[0], a[1], a[2], fetch[a[3]])));
|
return tools::shared_ptr<base>(new reduction( reduction_parameters(a[0], a[1], a[2], fetch[a[3]])));
|
||||||
else if(template_name=="matrix-axpy")
|
else if(template_name=="maxpy")
|
||||||
return tools::shared_ptr<base>(new maxpy( maxpy_parameters(a[0], a[1], a[2], a[3], a[4], fetch[a[5]])));
|
return tools::shared_ptr<base>(new maxpy( maxpy_parameters(a[0], a[1], a[2], a[3], a[4], fetch[a[5]])));
|
||||||
else if(template_name.find("row-wise-reduction")!=std::string::npos)
|
else if(template_name.find("row-wise-reduction")!=std::string::npos)
|
||||||
{
|
{
|
||||||
@@ -185,11 +185,9 @@ namespace detail
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
model_map_t import(std::string const & fname, cl::CommandQueue & queue)
|
void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& result)
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace js = rapidjson;
|
namespace js = rapidjson;
|
||||||
model_map_t result;
|
|
||||||
//Parse the JSON document
|
//Parse the JSON document
|
||||||
js::Document document;
|
js::Document document;
|
||||||
std::ifstream t(fname.c_str());
|
std::ifstream t(fname.c_str());
|
||||||
@@ -201,7 +199,7 @@ model_map_t import(std::string const & fname, cl::CommandQueue & queue)
|
|||||||
document.Parse<0>(str.c_str());
|
document.Parse<0>(str.c_str());
|
||||||
//Deserialize
|
//Deserialize
|
||||||
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "reduction"
|
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "reduction"
|
||||||
<< "matrix-axpy" << "row-wise-reductionN" << "row-wise-reductionT"
|
<< "maxpy" << "row-wise-reductionN" << "row-wise-reductionT"
|
||||||
<< "matrix-productNN" << "matrix-productTN" << "matrix-productNT" << "matrix-productTT";
|
<< "matrix-productNN" << "matrix-productTN" << "matrix-productNT" << "matrix-productTT";
|
||||||
std::vector<std::string> dtype = tools::make_vector<std::string>() << "float32" << "float64";
|
std::vector<std::string> dtype = tools::make_vector<std::string>() << "float32" << "float64";
|
||||||
for(std::vector<std::string>::iterator op = operations.begin() ; op != operations.end() ; ++op)
|
for(std::vector<std::string>::iterator op = operations.begin() ; op != operations.end() ; ++op)
|
||||||
@@ -222,19 +220,18 @@ model_map_t import(std::string const & fname, cl::CommandQueue & queue)
|
|||||||
js::Value const & profiles = document[opcstr][dtcstr]["profiles"];
|
js::Value const & profiles = document[opcstr][dtcstr]["profiles"];
|
||||||
for (js::SizeType id = 0 ; id < profiles.Size() ; ++id)
|
for (js::SizeType id = 0 ; id < profiles.Size() ; ++id)
|
||||||
templates.push_back(detail::create(*op, tools::to_int_array<int>(profiles[id])));
|
templates.push_back(detail::create(*op, tools::to_int_array<int>(profiles[id])));
|
||||||
if(templates.size()>1){
|
if(templates.size()>1)
|
||||||
|
{
|
||||||
// Get predictor
|
// Get predictor
|
||||||
predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]);
|
predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]);
|
||||||
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(predictor, templates, queue));
|
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(predictor, templates, queue));
|
||||||
}else{
|
|
||||||
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(templates, queue));
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(templates, queue));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
model_map_t init_models(cl::CommandQueue & queue)
|
model_map_t init_models(cl::CommandQueue & queue)
|
||||||
@@ -243,7 +240,7 @@ model_map_t init_models(cl::CommandQueue & queue)
|
|||||||
typedef tools::shared_ptr<model> ptr_t;
|
typedef tools::shared_ptr<model> ptr_t;
|
||||||
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
||||||
|
|
||||||
for(size_t i = 0 ; i < 1 ; ++i){
|
for(size_t i = 0 ; i < 10 ; ++i){
|
||||||
numeric_type DTYPE = types[i];
|
numeric_type DTYPE = types[i];
|
||||||
res[std::make_pair(SCALAR_AXPY_TYPE, DTYPE)] = ptr_t(new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
|
res[std::make_pair(SCALAR_AXPY_TYPE, DTYPE)] = ptr_t(new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
|
||||||
res[std::make_pair(VECTOR_AXPY_TYPE, DTYPE)] = ptr_t (new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
|
res[std::make_pair(VECTOR_AXPY_TYPE, DTYPE)] = ptr_t (new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
|
||||||
@@ -257,7 +254,7 @@ model_map_t init_models(cl::CommandQueue & queue)
|
|||||||
res[std::make_pair(MATRIX_PRODUCT_TT_TYPE, DTYPE)] = ptr_t(new model(mproduct_tt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue));
|
res[std::make_pair(MATRIX_PRODUCT_TT_TYPE, DTYPE)] = ptr_t(new model(mproduct_tt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue));
|
||||||
}
|
}
|
||||||
if(const char * cmodel_file = std::getenv("ATIDLAS_MODEL_DEVICE_0"))
|
if(const char * cmodel_file = std::getenv("ATIDLAS_MODEL_DEVICE_0"))
|
||||||
return import(std::string(cmodel_file), queue);
|
import(std::string(cmodel_file), queue, res);
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
|
||||||
|
@@ -77,17 +77,17 @@ bp::tuple get_shape(atd::array const & x)
|
|||||||
// x.reshape(size1, size2);
|
// x.reshape(size1, size2);
|
||||||
//}
|
//}
|
||||||
|
|
||||||
boost::python::dict create_queues(atd::cl::queues_t queues)
|
//boost::python::dict create_queues(atd::cl::queues_t queues)
|
||||||
{
|
//{
|
||||||
boost::python::dict dictionary;
|
// boost::python::dict dictionary;
|
||||||
for (atd::cl::queues_t::iterator it = queues.begin(); it != queues.end(); ++it) {
|
// for (atd::cl::queues_t::iterator it = queues.begin(); it != queues.end(); ++it) {
|
||||||
bp::list list;
|
// bp::list list;
|
||||||
for (atd::cl::queues_t::mapped_type::iterator itt = it->second.begin(); itt != it->second.end(); ++itt)
|
// for (atd::cl::queues_t::mapped_type::iterator itt = it->second.begin(); itt != it->second.end(); ++itt)
|
||||||
list.append(*itt);
|
// list.append(*itt);
|
||||||
dictionary[it->first] = list;
|
// dictionary[it->first] = list;
|
||||||
}
|
// }
|
||||||
return dictionary;
|
// return dictionary;
|
||||||
}
|
//}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
struct datatype : public atd::value_scalar
|
struct datatype : public atd::value_scalar
|
||||||
@@ -198,7 +198,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<atd::cl::CommandQueue> & get_queue(atd::cl::Context const & ctx)
|
std::vector<atd::cl::CommandQueue> & get_queue(atd::cl::Context const & ctx)
|
||||||
{ return atd::cl::queues[ctx]; }
|
{ return atd::cl::get_queues(ctx); }
|
||||||
|
|
||||||
atd::numeric_type extract_dtype(bp::object const & odtype)
|
atd::numeric_type extract_dtype(bp::object const & odtype)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user