Various fixes

This commit is contained in:
Philippe Tillet
2015-01-27 02:41:27 -05:00
parent 909e4b16a0
commit a96c897cb3
11 changed files with 141 additions and 139 deletions

View File

@@ -36,86 +36,86 @@ void bench(ad::numeric_type dtype)
std::cout << " " << PERF << std::flush;\
}
// /*---------*/
// /*--BLAS1--*/
// /*---------*/
// std::cout << "#AXPY" << std::endl;
// for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
// {
// int_t N = *it;
// std::cout << N;
// /* ATIDLAS */
// atidlas::array x(N, dtype), y(N, dtype);
// BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize));
// /* clAmdBlas */
//#ifdef BENCH_CLAMDBLAS
// BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
//#endif
// /* BLAS */
//#ifdef BENCH_CBLAS
// std::vector<float> cx(N), cy(N);
// atidlas::copy(x, cx);
// atidlas::copy(y, cy);
// BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
//#endif
// std::cout << std::endl;
// }
// std::cout << "\n\n" << std::flush;
/*---------*/
/*--BLAS1--*/
/*---------*/
std::cout << "#AXPY" << std::endl;
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
{
int_t N = *it;
std::cout << N;
/* ATIDLAS */
atidlas::array x(N, dtype), y(N, dtype);
BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize));
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
atidlas::copy(x, cx);
atidlas::copy(y, cy);
BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
#endif
std::cout << std::endl;
}
std::cout << "\n\n" << std::flush;
// std::cout << "#DOT" << std::endl;
// for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
// {
// int_t N = *it;
// std::cout << N;
// /* ATIDLAS */
// atidlas::array x(N, dtype), y(N, dtype);
// atidlas::array scratch(N, dtype);
// atidlas::scalar s(dtype);
// BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
// /* clAmdBlas */
//#ifdef BENCH_CLAMDBLAS
// BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
//#endif
// /* BLAS */
//#ifdef BENCH_CBLAS
// std::vector<float> cx(N), cy(N);
// atidlas::copy(x, cx);
// atidlas::copy(y, cy);
// BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
//#endif
// std::cout << std::endl;
// }
// std::cout << "\n\n" << std::flush;
std::cout << "#DOT" << std::endl;
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
{
int_t N = *it;
std::cout << N;
/* ATIDLAS */
atidlas::array x(N, dtype), y(N, dtype);
atidlas::array scratch(N, dtype);
atidlas::scalar s(dtype);
BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
atidlas::copy(x, cx);
atidlas::copy(y, cy);
BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
#endif
std::cout << std::endl;
}
std::cout << "\n\n" << std::flush;
// /*---------*/
// /*--BLAS2--*/
// /*---------*/
// //T-layout
// std::cout << "#GEMV-T" << std::endl;
// for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
// for(std::vector<int_t>::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit)
// {
// int_t M = *Mit;
// int_t N = *Nit;
// std::cout << M << "," << N;
// /* ATIDLAS */
// atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype);
// BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
// /* clAmdBlas */
// #ifdef BENCH_CLAMDBLAS
// BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
// #endif
// /* BLAS */
// #ifdef BENCH_CBLAS
// std::vector<float> cA(N*M), cx(N), cy(M);
// atidlas::copy(x, cx);
// atidlas::copy(y, cy);
// atidlas::copy(A, cA);
// BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
// #endif
// std::cout << std::endl;
// }
// std::cout << "\n\n" << std::flush;
/*---------*/
/*--BLAS2--*/
/*---------*/
//T-layout
std::cout << "#GEMV-T" << std::endl;
for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
for(std::vector<int_t>::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit)
{
int_t M = *Mit;
int_t N = *Nit;
std::cout << M << "," << N;
/* ATIDLAS */
atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype);
BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cA(N*M), cx(N), cy(M);
atidlas::copy(x, cx);
atidlas::copy(y, cy);
atidlas::copy(A, cA);
BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
#endif
std::cout << std::endl;
}
std::cout << "\n\n" << std::flush;
// /*---------*/
// /*--BLAS3--*/

View File

@@ -47,8 +47,8 @@ private:
static const std::vector<int> BLAS1_N = create_log_range(1e3, 2e7, 50, 64);
// BLAS2 Sizes
static const std::vector<int> BLAS2_M = make_vector<int>() << 256;
static const std::vector<int> BLAS2_N = create_full_range(128, 5000, 64);
static const std::vector<int> BLAS2_N = make_vector<int>() << 64;
static const std::vector<int> BLAS2_M = create_full_range(128, 10000, 64);
// BLAS3 Sizes
static const std::vector<int> BLAS3_M = make_vector<int>() << 1024;

View File

@@ -10,10 +10,7 @@ namespace atidlas
class maxpy_parameters : public base::parameters_type
{
public:
maxpy_parameters(unsigned int _simd_width,
unsigned int _local_size_0, unsigned int _local_size_1,
unsigned int _num_groups_0, unsigned int _num_groups_1,
fetching_policy_type _fetching_policy);
maxpy_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy);
unsigned int num_groups_0;
unsigned int num_groups_1;
@@ -28,14 +25,9 @@ private:
std::vector<std::string> generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const;
public:
maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2,
unsigned int ng1, unsigned int ng2, fetching_policy_type fetch,
binding_policy_t bind = BIND_ALL_UNIQUE);
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
unsigned int label,
symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions);
};
}

View File

@@ -36,7 +36,7 @@ private:
unsigned int lmem_usage(symbolic_expressions_container const & symbolic_expressions) const;
unsigned int registers_usage(symbolic_expressions_container const & symbolic_expressions) const;
int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const;
std::string generate_impl(unsigned int label, char id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const;
std::string generate_impl(unsigned int label, const char * id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const;
std::vector<std::string> generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const;
void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
array_infos const & A, array_infos const & B, array_infos const & C,

View File

@@ -22,7 +22,7 @@ private:
int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const;
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_reduction*> exprs,
std::string const & buf_str, std::string const & buf_value_str) const;
std::string generate_impl(unsigned int label, char type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const;
std::string generate_impl(unsigned int label, const char * type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const;
std::vector<std::string> generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const;
public:

View File

@@ -5,15 +5,12 @@ add_library(atidlas SHARED ${LIBATIDLAS_SRC})
set_target_properties(atidlas PROPERTIES
COMPILE_FLAGS "-Wno-sign-compare -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic")
set(PREINSTALL_PY "${CMAKE_CURRENT_BINARY_DIR}/preinstall.py")
#install(TARGETS atidlas LIBRARY DESTINATION lib)
#set(INSTALL_INCLUDE_DIR /usr/local/include)
#install(DIRECTORY atidlas "${PROJECT_SOURCE_DIR}/include/atidlas"
# DESTINATION "${INSTALL_INCLUDE_DIR}" FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
set(POSTINSTALL_PY "${CMAKE_CURRENT_BINARY_DIR}/postinstall.py")
configure_file("${PROJECT_SOURCE_DIR}/python/preinstall.py" ${PREINSTALL_PY})
configure_file("${PROJECT_SOURCE_DIR}/python/postinstall.py" ${POSTINSTALL_PY})
install(CODE "execute_process(COMMAND ${PYTHON} ${PREINSTALL_PY})")
install(TARGETS atidlas LIBRARY DESTINATION lib)
set(INSTALL_INCLUDE_DIR /usr/local/include)
install(DIRECTORY atidlas "${PROJECT_SOURCE_DIR}/include/atidlas"
DESTINATION "${INSTALL_INCLUDE_DIR}" FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
install(CODE "execute_process(COMMAND ${PYTHON} ${POST_INSTALL_SCRIPT_PY})")
install(CODE "execute_process(COMMAND ${PYTHON} ${POSTINSTALL_PY})")

View File

@@ -29,8 +29,11 @@ std::string maxpy::generate_impl(unsigned int label, symbolic_expressions_contai
std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1;
char kprefix[10];
fill_kernel_name(kprefix, label, "d");
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
stream << "__kernel void " << "k" << label << "d" << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();

View File

@@ -87,7 +87,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
return TEMPLATE_VALID;
}
std::string mproduct::generate_impl(unsigned int label, char id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const
std::string mproduct::generate_impl(unsigned int label, const char * id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const
{
using std::string;
using tools::to_string;
@@ -121,7 +121,10 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
/// //////////////
std::string widthdtype = append_width("#scalartype", p.simd_width);
stream << " __attribute__((reqd_work_group_size(" << p.local_size_0 << "," << p.local_size_1 << ",1)))" << std::endl;
stream << "__kernel void " << "k" << label << id << "(unsigned int M, unsigned int N, unsigned int K, "
char kprefix[10];
fill_kernel_name(kprefix, label, id);
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, unsigned int K, "
<< C.process("__global #scalartype* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,")
<< alpha.process("#scalartype #name,")
<< A.process("__global " + widthdtype + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,")
@@ -557,8 +560,8 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
std::vector<std::string> mproduct::generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const
{
std::vector<std::string> res;
res.push_back(generate_impl(label, 'o', symbolic_expressions, mappings, false));
res.push_back(generate_impl(label, 'f', symbolic_expressions, mappings, true));
res.push_back(generate_impl(label, "o", symbolic_expressions, mappings, false));
res.push_back(generate_impl(label, "f", symbolic_expressions, mappings, true));
return res;
}

View File

@@ -36,8 +36,11 @@ std::string mreduction::generate_impl(unsigned int label, symbolic_expressions_c
kernel_generation_stream stream;
char kprefix[10];
fill_kernel_name(kprefix, label, "d");
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
stream << "__kernel void " << "k" << label << "d" << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();

View File

@@ -56,7 +56,7 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream,
stream << "}" << std::endl;
}
std::string reduction::generate_impl(unsigned int label, char type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
std::string reduction::generate_impl(unsigned int label, const char * type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
{
kernel_generation_stream stream;
@@ -85,8 +85,11 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp
/* ------------------------
* First Kernel
* -----------------------*/
char kprefix[10];
fill_kernel_name(kprefix, label, type);
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
stream << "__kernel void " << "k" << label << type << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();
@@ -191,7 +194,7 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp
* Second kernel
* -----------------------*/
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
stream << "__kernel void " << "k" << label << type << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();
@@ -256,8 +259,8 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp
std::vector<std::string> reduction::generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const
{
std::vector<std::string> result;
result.push_back(generate_impl(label, 'f', symbolic_expressions, mappings, 1));
result.push_back(generate_impl(label, 'o', symbolic_expressions, mappings, p_.simd_width));
result.push_back(generate_impl(label, "f", symbolic_expressions, mappings, 1));
result.push_back(generate_impl(label, "o", symbolic_expressions, mappings, p_.simd_width));
return result;
}

View File

@@ -142,14 +142,14 @@ namespace detail
static expression_type get_expression_type(std::string const & name)
{
if(name=="vaxpy") return VECTOR_AXPY_TYPE;
if(name=="reduction") return REDUCTION_TYPE;
if(name=="dot") return REDUCTION_TYPE;
if(name=="maxpy") return MATRIX_AXPY_TYPE;
if(name=="row-wise-reductionN") return ROW_WISE_REDUCTION_TYPE;
if(name=="row-wise-reductionT") return COL_WISE_REDUCTION_TYPE;
if(name=="matrix-productNN") return MATRIX_PRODUCT_NN_TYPE;
if(name=="matrix-productNT") return MATRIX_PRODUCT_NT_TYPE;
if(name=="matrix-productTN") return MATRIX_PRODUCT_TN_TYPE;
if(name=="matrix-productTT") return MATRIX_PRODUCT_TT_TYPE;
if(name=="gemvN") return ROW_WISE_REDUCTION_TYPE;
if(name=="gemvT") return COL_WISE_REDUCTION_TYPE;
if(name=="gemmNN") return MATRIX_PRODUCT_NN_TYPE;
if(name=="gemmNT") return MATRIX_PRODUCT_NT_TYPE;
if(name=="gemmTN") return MATRIX_PRODUCT_TN_TYPE;
if(name=="gemmTT") return MATRIX_PRODUCT_TT_TYPE;
throw ;
}
@@ -164,22 +164,23 @@ namespace detail
{
fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS};
if(template_name=="vaxpy")
return tools::shared_ptr<base>(new vaxpy( vaxpy_parameters(a[0], a[1], a[2], fetch[a[3]])));
else if(template_name=="reduction")
return tools::shared_ptr<base>(new reduction( reduction_parameters(a[0], a[1], a[2], fetch[a[3]])));
return tools::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
else if(template_name=="dot")
return tools::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
else if(template_name=="maxpy")
return tools::shared_ptr<base>(new maxpy( maxpy_parameters(a[0], a[1], a[2], a[3], a[4], fetch[a[5]])));
else if(template_name.find("row-wise-reduction")!=std::string::npos)
{
return tools::shared_ptr<base>(new mreduction_rows( mreduction_parameters(a[0], a[1], a[2], a[3], fetch[a[4]])));
}
else if(template_name.find("matrix-product")!=std::string::npos)
{
char A_trans = template_name[15];
char B_trans = template_name[16];
return tools::shared_ptr<base>(new mproduct( mproduct_parameters(a[0], a[1], a[2], a[3], a[4], a[5], a[6],
fetch[a[7]], fetch[a[8]], a[9], a[10]), A_trans, B_trans));
}
return tools::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
else if(template_name.find("gemvN")!=std::string::npos)
return tools::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
else if(template_name.find("gemvT")!=std::string::npos)
return tools::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
else if(template_name.find("gemmNN")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmTN")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmNT")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmTT")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else
throw operation_not_supported_exception("Cannot create the given operation");
}
@@ -198,9 +199,9 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
document.Parse<0>(str.c_str());
//Deserialize
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "reduction"
<< "maxpy" << "row-wise-reductionN" << "row-wise-reductionT"
<< "matrix-productNN" << "matrix-productTN" << "matrix-productNT" << "matrix-productTT";
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "dot"
<< "maxpy" << "gemvN" << "gemvT"
<< "gemmNN" << "gemmTN" << "gemmTT";
std::vector<std::string> dtype = tools::make_vector<std::string>() << "float32" << "float64";
for(std::vector<std::string>::iterator op = operations.begin() ; op != operations.end() ; ++op)
{