diff --git a/bench/blas.cpp b/bench/blas.cpp index 2aa5d7cc3..e4127113f 100644 --- a/bench/blas.cpp +++ b/bench/blas.cpp @@ -36,86 +36,86 @@ void bench(ad::numeric_type dtype) std::cout << " " << PERF << std::flush;\ } -// /*---------*/ -// /*--BLAS1--*/ -// /*---------*/ -// std::cout << "#AXPY" << std::endl; -// for(std::vector::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it) -// { -// int_t N = *it; -// std::cout << N; -// /* ATIDLAS */ -// atidlas::array x(N, dtype), y(N, dtype); -// BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize)); -// /* clAmdBlas */ -//#ifdef BENCH_CLAMDBLAS -// BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize)) -//#endif -// /* BLAS */ -//#ifdef BENCH_CBLAS -// std::vector cx(N), cy(N); -// atidlas::copy(x, cx); -// atidlas::copy(y, cy); -// BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize)); -//#endif -// std::cout << std::endl; -// } -// std::cout << "\n\n" << std::flush; + /*---------*/ + /*--BLAS1--*/ + /*---------*/ + std::cout << "#AXPY" << std::endl; + for(std::vector::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it) + { + int_t N = *it; + std::cout << N; + /* ATIDLAS */ + atidlas::array x(N, dtype), y(N, dtype); + BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize)); + /* clAmdBlas */ +#ifdef BENCH_CLAMDBLAS + BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize)) +#endif + /* BLAS */ +#ifdef BENCH_CBLAS + std::vector cx(N), cy(N); + atidlas::copy(x, cx); + atidlas::copy(y, cy); + BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize)); +#endif + std::cout << std::endl; + } + std::cout << "\n\n" << std::flush; -// std::cout << "#DOT" << std::endl; -// for(std::vector::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it) -// { -// int_t N = *it; -// std::cout << N; -// /* ATIDLAS */ -// atidlas::array x(N, dtype), y(N, dtype); -// atidlas::array scratch(N, dtype); -// atidlas::scalar s(dtype); -// BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize)); -// /* clAmdBlas */ -//#ifdef BENCH_CLAMDBLAS -// BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize)) -//#endif -// /* BLAS */ -//#ifdef BENCH_CBLAS -// std::vector cx(N), cy(N); -// atidlas::copy(x, cx); -// atidlas::copy(y, cy); -// BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize)); -//#endif -// std::cout << std::endl; -// } -// std::cout << "\n\n" << std::flush; + std::cout << "#DOT" << std::endl; + for(std::vector::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it) + { + int_t N = *it; + std::cout << N; + /* ATIDLAS */ + atidlas::array x(N, dtype), y(N, dtype); + atidlas::array scratch(N, dtype); + atidlas::scalar s(dtype); + BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize)); + /* clAmdBlas */ +#ifdef BENCH_CLAMDBLAS + BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize)) +#endif + /* BLAS */ +#ifdef BENCH_CBLAS + std::vector cx(N), cy(N); + atidlas::copy(x, cx); + atidlas::copy(y, cy); + BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize)); +#endif + std::cout << std::endl; + } + std::cout << "\n\n" << std::flush; -// /*---------*/ -// /*--BLAS2--*/ -// /*---------*/ -// //T-layout -// std::cout << "#GEMV-T" << std::endl; -// for(std::vector::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit) -// for(std::vector::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit) -// { -// int_t M = *Mit; -// int_t N = *Nit; -// std::cout << M << "," << N; -// /* ATIDLAS */ -// atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype); -// BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize)); -// /* clAmdBlas */ -// #ifdef BENCH_CLAMDBLAS -// BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize)) -// #endif -// /* BLAS */ -// #ifdef BENCH_CBLAS -// std::vector cA(N*M), cx(N), cy(M); -// atidlas::copy(x, cx); -// atidlas::copy(y, cy); -// atidlas::copy(A, cA); -// BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize)); -// #endif -// std::cout << std::endl; -// } -// std::cout << "\n\n" << std::flush; + /*---------*/ + /*--BLAS2--*/ + /*---------*/ + //T-layout + std::cout << "#GEMV-T" << std::endl; + for(std::vector::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit) + for(std::vector::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit) + { + int_t M = *Mit; + int_t N = *Nit; + std::cout << M << "," << N; + /* ATIDLAS */ + atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype); + BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize)); + /* clAmdBlas */ + #ifdef BENCH_CLAMDBLAS + BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize)) + #endif + /* BLAS */ + #ifdef BENCH_CBLAS + std::vector cA(N*M), cx(N), cy(M); + atidlas::copy(x, cx); + atidlas::copy(y, cy); + atidlas::copy(A, cA); + BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize)); + #endif + std::cout << std::endl; + } + std::cout << "\n\n" << std::flush; // /*---------*/ // /*--BLAS3--*/ diff --git a/bench/common.hpp b/bench/common.hpp index 64c71cece..8889e442e 100644 --- a/bench/common.hpp +++ b/bench/common.hpp @@ -47,8 +47,8 @@ private: static const std::vector BLAS1_N = create_log_range(1e3, 2e7, 50, 64); // BLAS2 Sizes -static const std::vector BLAS2_M = make_vector() << 256; -static const std::vector BLAS2_N = create_full_range(128, 5000, 64); +static const std::vector BLAS2_N = make_vector() << 64; +static const std::vector BLAS2_M = create_full_range(128, 10000, 64); // BLAS3 Sizes static const std::vector BLAS3_M = make_vector() << 1024; diff --git a/include/atidlas/backend/templates/maxpy.h b/include/atidlas/backend/templates/maxpy.h index 3fc172141..4e877f1ad 100644 --- a/include/atidlas/backend/templates/maxpy.h +++ b/include/atidlas/backend/templates/maxpy.h @@ -10,10 +10,7 @@ namespace atidlas class maxpy_parameters : public base::parameters_type { public: - maxpy_parameters(unsigned int _simd_width, - unsigned int _local_size_0, unsigned int _local_size_1, - unsigned int _num_groups_0, unsigned int _num_groups_1, - fetching_policy_type _fetching_policy); + maxpy_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy); unsigned int num_groups_0; unsigned int num_groups_1; @@ -28,14 +25,9 @@ private: std::vector generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings) const; public: maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE); - maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, - unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, - binding_policy_t bind = BIND_ALL_UNIQUE); + maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE); std::vector input_sizes(symbolic_expressions_container const & symbolic_expressions); - void enqueue(cl::CommandQueue & queue, - std::vector & programs, - unsigned int label, - symbolic_expressions_container const & symbolic_expressions); + void enqueue(cl::CommandQueue & queue, std::vector & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions); }; } diff --git a/include/atidlas/backend/templates/mproduct.h b/include/atidlas/backend/templates/mproduct.h index 6b35a347f..e022a0d14 100644 --- a/include/atidlas/backend/templates/mproduct.h +++ b/include/atidlas/backend/templates/mproduct.h @@ -36,7 +36,7 @@ private: unsigned int lmem_usage(symbolic_expressions_container const & symbolic_expressions) const; unsigned int registers_usage(symbolic_expressions_container const & symbolic_expressions) const; int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const; - std::string generate_impl(unsigned int label, char id, const symbolic_expressions_container &symbolic_expressions, const std::vector &, bool fallback) const; + std::string generate_impl(unsigned int label, const char * id, const symbolic_expressions_container &symbolic_expressions, const std::vector &, bool fallback) const; std::vector generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings) const; void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K, array_infos const & A, array_infos const & B, array_infos const & C, diff --git a/include/atidlas/backend/templates/reduction.h b/include/atidlas/backend/templates/reduction.h index 5a1bde6d5..9a0282a4b 100644 --- a/include/atidlas/backend/templates/reduction.h +++ b/include/atidlas/backend/templates/reduction.h @@ -22,7 +22,7 @@ private: int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const; inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector exprs, std::string const & buf_str, std::string const & buf_value_str) const; - std::string generate_impl(unsigned int label, char type, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings, unsigned int simd_width) const; + std::string generate_impl(unsigned int label, const char * type, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings, unsigned int simd_width) const; std::vector generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings) const; public: diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 92091273a..c34004dc9 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -5,15 +5,12 @@ add_library(atidlas SHARED ${LIBATIDLAS_SRC}) set_target_properties(atidlas PROPERTIES COMPILE_FLAGS "-Wno-sign-compare -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic") -set(PREINSTALL_PY "${CMAKE_CURRENT_BINARY_DIR}/preinstall.py") + +#install(TARGETS atidlas LIBRARY DESTINATION lib) +#set(INSTALL_INCLUDE_DIR /usr/local/include) +#install(DIRECTORY atidlas "${PROJECT_SOURCE_DIR}/include/atidlas" +# DESTINATION "${INSTALL_INCLUDE_DIR}" FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp") + set(POSTINSTALL_PY "${CMAKE_CURRENT_BINARY_DIR}/postinstall.py") - -configure_file("${PROJECT_SOURCE_DIR}/python/preinstall.py" ${PREINSTALL_PY}) configure_file("${PROJECT_SOURCE_DIR}/python/postinstall.py" ${POSTINSTALL_PY}) - -install(CODE "execute_process(COMMAND ${PYTHON} ${PREINSTALL_PY})") -install(TARGETS atidlas LIBRARY DESTINATION lib) -set(INSTALL_INCLUDE_DIR /usr/local/include) -install(DIRECTORY atidlas "${PROJECT_SOURCE_DIR}/include/atidlas" - DESTINATION "${INSTALL_INCLUDE_DIR}" FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp") -install(CODE "execute_process(COMMAND ${PYTHON} ${POST_INSTALL_SCRIPT_PY})") +install(CODE "execute_process(COMMAND ${PYTHON} ${POSTINSTALL_PY})") diff --git a/lib/backend/templates/maxpy.cpp b/lib/backend/templates/maxpy.cpp index 1ff2fc703..4f14fac9e 100644 --- a/lib/backend/templates/maxpy.cpp +++ b/lib/backend/templates/maxpy.cpp @@ -29,8 +29,11 @@ std::string maxpy::generate_impl(unsigned int label, symbolic_expressions_contai std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1; + char kprefix[10]; + fill_kernel_name(kprefix, label, "d"); + stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; - stream << "__kernel void " << "k" << label << "d" << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); diff --git a/lib/backend/templates/mproduct.cpp b/lib/backend/templates/mproduct.cpp index 47833fd79..d3de1293d 100644 --- a/lib/backend/templates/mproduct.cpp +++ b/lib/backend/templates/mproduct.cpp @@ -87,7 +87,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width return TEMPLATE_VALID; } - std::string mproduct::generate_impl(unsigned int label, char id, const symbolic_expressions_container &symbolic_expressions, const std::vector &, bool fallback) const + std::string mproduct::generate_impl(unsigned int label, const char * id, const symbolic_expressions_container &symbolic_expressions, const std::vector &, bool fallback) const { using std::string; using tools::to_string; @@ -121,7 +121,10 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width /// ////////////// std::string widthdtype = append_width("#scalartype", p.simd_width); stream << " __attribute__((reqd_work_group_size(" << p.local_size_0 << "," << p.local_size_1 << ",1)))" << std::endl; - stream << "__kernel void " << "k" << label << id << "(unsigned int M, unsigned int N, unsigned int K, " + char kprefix[10]; + fill_kernel_name(kprefix, label, id); + + stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, unsigned int K, " << C.process("__global #scalartype* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,") << alpha.process("#scalartype #name,") << A.process("__global " + widthdtype + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,") @@ -557,8 +560,8 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width std::vector mproduct::generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings) const { std::vector res; - res.push_back(generate_impl(label, 'o', symbolic_expressions, mappings, false)); - res.push_back(generate_impl(label, 'f', symbolic_expressions, mappings, true)); + res.push_back(generate_impl(label, "o", symbolic_expressions, mappings, false)); + res.push_back(generate_impl(label, "f", symbolic_expressions, mappings, true)); return res; } diff --git a/lib/backend/templates/mreduction.cpp b/lib/backend/templates/mreduction.cpp index 8c98c26ea..33a6e8f22 100644 --- a/lib/backend/templates/mreduction.cpp +++ b/lib/backend/templates/mreduction.cpp @@ -36,8 +36,11 @@ std::string mreduction::generate_impl(unsigned int label, symbolic_expressions_c kernel_generation_stream stream; + char kprefix[10]; + fill_kernel_name(kprefix, label, "d"); + stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; - stream << "__kernel void " << "k" << label << "d" << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); diff --git a/lib/backend/templates/reduction.cpp b/lib/backend/templates/reduction.cpp index 59da66b90..fe79bb310 100644 --- a/lib/backend/templates/reduction.cpp +++ b/lib/backend/templates/reduction.cpp @@ -56,7 +56,7 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream, stream << "}" << std::endl; } -std::string reduction::generate_impl(unsigned int label, char type, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings, unsigned int simd_width) const +std::string reduction::generate_impl(unsigned int label, const char * type, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings, unsigned int simd_width) const { kernel_generation_stream stream; @@ -85,8 +85,11 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp /* ------------------------ * First Kernel * -----------------------*/ + char kprefix[10]; + fill_kernel_name(kprefix, label, type); + stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; - stream << "__kernel void " << "k" << label << type << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -191,7 +194,7 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp * Second kernel * -----------------------*/ stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; - stream << "__kernel void " << "k" << label << type << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; + stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -256,8 +259,8 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp std::vector reduction::generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector const & mappings) const { std::vector result; - result.push_back(generate_impl(label, 'f', symbolic_expressions, mappings, 1)); - result.push_back(generate_impl(label, 'o', symbolic_expressions, mappings, p_.simd_width)); + result.push_back(generate_impl(label, "f", symbolic_expressions, mappings, 1)); + result.push_back(generate_impl(label, "o", symbolic_expressions, mappings, p_.simd_width)); return result; } diff --git a/lib/model/model.cpp b/lib/model/model.cpp index 4936d8a7a..0eff6959e 100644 --- a/lib/model/model.cpp +++ b/lib/model/model.cpp @@ -142,14 +142,14 @@ namespace detail static expression_type get_expression_type(std::string const & name) { if(name=="vaxpy") return VECTOR_AXPY_TYPE; - if(name=="reduction") return REDUCTION_TYPE; + if(name=="dot") return REDUCTION_TYPE; if(name=="maxpy") return MATRIX_AXPY_TYPE; - if(name=="row-wise-reductionN") return ROW_WISE_REDUCTION_TYPE; - if(name=="row-wise-reductionT") return COL_WISE_REDUCTION_TYPE; - if(name=="matrix-productNN") return MATRIX_PRODUCT_NN_TYPE; - if(name=="matrix-productNT") return MATRIX_PRODUCT_NT_TYPE; - if(name=="matrix-productTN") return MATRIX_PRODUCT_TN_TYPE; - if(name=="matrix-productTT") return MATRIX_PRODUCT_TT_TYPE; + if(name=="gemvN") return ROW_WISE_REDUCTION_TYPE; + if(name=="gemvT") return COL_WISE_REDUCTION_TYPE; + if(name=="gemmNN") return MATRIX_PRODUCT_NN_TYPE; + if(name=="gemmNT") return MATRIX_PRODUCT_NT_TYPE; + if(name=="gemmTN") return MATRIX_PRODUCT_TN_TYPE; + if(name=="gemmTT") return MATRIX_PRODUCT_TT_TYPE; throw ; } @@ -164,22 +164,23 @@ namespace detail { fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS}; if(template_name=="vaxpy") - return tools::shared_ptr(new vaxpy( vaxpy_parameters(a[0], a[1], a[2], fetch[a[3]]))); - else if(template_name=="reduction") - return tools::shared_ptr(new reduction( reduction_parameters(a[0], a[1], a[2], fetch[a[3]]))); + return tools::shared_ptr(new vaxpy(a[0], a[1], a[2], fetch[a[3]])); + else if(template_name=="dot") + return tools::shared_ptr(new reduction(a[0], a[1], a[2], fetch[a[3]])); else if(template_name=="maxpy") - return tools::shared_ptr(new maxpy( maxpy_parameters(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]))); - else if(template_name.find("row-wise-reduction")!=std::string::npos) - { - return tools::shared_ptr(new mreduction_rows( mreduction_parameters(a[0], a[1], a[2], a[3], fetch[a[4]]))); - } - else if(template_name.find("matrix-product")!=std::string::npos) - { - char A_trans = template_name[15]; - char B_trans = template_name[16]; - return tools::shared_ptr(new mproduct( mproduct_parameters(a[0], a[1], a[2], a[3], a[4], a[5], a[6], - fetch[a[7]], fetch[a[8]], a[9], a[10]), A_trans, B_trans)); - } + return tools::shared_ptr(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]])); + else if(template_name.find("gemvN")!=std::string::npos) + return tools::shared_ptr(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]])); + else if(template_name.find("gemvT")!=std::string::npos) + return tools::shared_ptr(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]])); + else if(template_name.find("gemmNN")!=std::string::npos) + return tools::shared_ptr(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10])); + else if(template_name.find("gemmTN")!=std::string::npos) + return tools::shared_ptr(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10])); + else if(template_name.find("gemmNT")!=std::string::npos) + return tools::shared_ptr(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10])); + else if(template_name.find("gemmTT")!=std::string::npos) + return tools::shared_ptr(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10])); else throw operation_not_supported_exception("Cannot create the given operation"); } @@ -198,9 +199,9 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); document.Parse<0>(str.c_str()); //Deserialize - std::vector operations = tools::make_vector() << "vaxpy" << "reduction" - << "maxpy" << "row-wise-reductionN" << "row-wise-reductionT" - << "matrix-productNN" << "matrix-productTN" << "matrix-productNT" << "matrix-productTT"; + std::vector operations = tools::make_vector() << "vaxpy" << "dot" + << "maxpy" << "gemvN" << "gemvT" + << "gemmNN" << "gemmTN" << "gemmTT"; std::vector dtype = tools::make_vector() << "float32" << "float64"; for(std::vector::iterator op = operations.begin() ; op != operations.end() ; ++op) {