Various fixes
This commit is contained in:
156
bench/blas.cpp
156
bench/blas.cpp
@@ -36,86 +36,86 @@ void bench(ad::numeric_type dtype)
|
||||
std::cout << " " << PERF << std::flush;\
|
||||
}
|
||||
|
||||
// /*---------*/
|
||||
// /*--BLAS1--*/
|
||||
// /*---------*/
|
||||
// std::cout << "#AXPY" << std::endl;
|
||||
// for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||
// {
|
||||
// int_t N = *it;
|
||||
// std::cout << N;
|
||||
// /* ATIDLAS */
|
||||
// atidlas::array x(N, dtype), y(N, dtype);
|
||||
// BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize));
|
||||
// /* clAmdBlas */
|
||||
//#ifdef BENCH_CLAMDBLAS
|
||||
// BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
|
||||
//#endif
|
||||
// /* BLAS */
|
||||
//#ifdef BENCH_CBLAS
|
||||
// std::vector<float> cx(N), cy(N);
|
||||
// atidlas::copy(x, cx);
|
||||
// atidlas::copy(y, cy);
|
||||
// BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
|
||||
//#endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
/*---------*/
|
||||
/*--BLAS1--*/
|
||||
/*---------*/
|
||||
std::cout << "#AXPY" << std::endl;
|
||||
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||
{
|
||||
int_t N = *it;
|
||||
std::cout << N;
|
||||
/* ATIDLAS */
|
||||
atidlas::array x(N, dtype), y(N, dtype);
|
||||
BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
atidlas::copy(x, cx);
|
||||
atidlas::copy(y, cy);
|
||||
BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
|
||||
// std::cout << "#DOT" << std::endl;
|
||||
// for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||
// {
|
||||
// int_t N = *it;
|
||||
// std::cout << N;
|
||||
// /* ATIDLAS */
|
||||
// atidlas::array x(N, dtype), y(N, dtype);
|
||||
// atidlas::array scratch(N, dtype);
|
||||
// atidlas::scalar s(dtype);
|
||||
// BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
|
||||
// /* clAmdBlas */
|
||||
//#ifdef BENCH_CLAMDBLAS
|
||||
// BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
|
||||
//#endif
|
||||
// /* BLAS */
|
||||
//#ifdef BENCH_CBLAS
|
||||
// std::vector<float> cx(N), cy(N);
|
||||
// atidlas::copy(x, cx);
|
||||
// atidlas::copy(y, cy);
|
||||
// BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
|
||||
//#endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
std::cout << "#DOT" << std::endl;
|
||||
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||
{
|
||||
int_t N = *it;
|
||||
std::cout << N;
|
||||
/* ATIDLAS */
|
||||
atidlas::array x(N, dtype), y(N, dtype);
|
||||
atidlas::array scratch(N, dtype);
|
||||
atidlas::scalar s(dtype);
|
||||
BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
atidlas::copy(x, cx);
|
||||
atidlas::copy(y, cy);
|
||||
BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
|
||||
// /*---------*/
|
||||
// /*--BLAS2--*/
|
||||
// /*---------*/
|
||||
// //T-layout
|
||||
// std::cout << "#GEMV-T" << std::endl;
|
||||
// for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
|
||||
// for(std::vector<int_t>::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit)
|
||||
// {
|
||||
// int_t M = *Mit;
|
||||
// int_t N = *Nit;
|
||||
// std::cout << M << "," << N;
|
||||
// /* ATIDLAS */
|
||||
// atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
// BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
|
||||
// /* clAmdBlas */
|
||||
// #ifdef BENCH_CLAMDBLAS
|
||||
// BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
|
||||
// #endif
|
||||
// /* BLAS */
|
||||
// #ifdef BENCH_CBLAS
|
||||
// std::vector<float> cA(N*M), cx(N), cy(M);
|
||||
// atidlas::copy(x, cx);
|
||||
// atidlas::copy(y, cy);
|
||||
// atidlas::copy(A, cA);
|
||||
// BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
|
||||
// #endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
/*---------*/
|
||||
/*--BLAS2--*/
|
||||
/*---------*/
|
||||
//T-layout
|
||||
std::cout << "#GEMV-T" << std::endl;
|
||||
for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
|
||||
for(std::vector<int_t>::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit)
|
||||
{
|
||||
int_t M = *Mit;
|
||||
int_t N = *Nit;
|
||||
std::cout << M << "," << N;
|
||||
/* ATIDLAS */
|
||||
atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cA(N*M), cx(N), cy(M);
|
||||
atidlas::copy(x, cx);
|
||||
atidlas::copy(y, cy);
|
||||
atidlas::copy(A, cA);
|
||||
BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
|
||||
// /*---------*/
|
||||
// /*--BLAS3--*/
|
||||
|
@@ -47,8 +47,8 @@ private:
|
||||
static const std::vector<int> BLAS1_N = create_log_range(1e3, 2e7, 50, 64);
|
||||
|
||||
// BLAS2 Sizes
|
||||
static const std::vector<int> BLAS2_M = make_vector<int>() << 256;
|
||||
static const std::vector<int> BLAS2_N = create_full_range(128, 5000, 64);
|
||||
static const std::vector<int> BLAS2_N = make_vector<int>() << 64;
|
||||
static const std::vector<int> BLAS2_M = create_full_range(128, 10000, 64);
|
||||
|
||||
// BLAS3 Sizes
|
||||
static const std::vector<int> BLAS3_M = make_vector<int>() << 1024;
|
||||
|
@@ -10,10 +10,7 @@ namespace atidlas
|
||||
class maxpy_parameters : public base::parameters_type
|
||||
{
|
||||
public:
|
||||
maxpy_parameters(unsigned int _simd_width,
|
||||
unsigned int _local_size_0, unsigned int _local_size_1,
|
||||
unsigned int _num_groups_0, unsigned int _num_groups_1,
|
||||
fetching_policy_type _fetching_policy);
|
||||
maxpy_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy);
|
||||
|
||||
unsigned int num_groups_0;
|
||||
unsigned int num_groups_1;
|
||||
@@ -28,14 +25,9 @@ private:
|
||||
std::vector<std::string> generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const;
|
||||
public:
|
||||
maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2,
|
||||
unsigned int ng1, unsigned int ng2, fetching_policy_type fetch,
|
||||
binding_policy_t bind = BIND_ALL_UNIQUE);
|
||||
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
|
||||
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions);
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -36,7 +36,7 @@ private:
|
||||
unsigned int lmem_usage(symbolic_expressions_container const & symbolic_expressions) const;
|
||||
unsigned int registers_usage(symbolic_expressions_container const & symbolic_expressions) const;
|
||||
int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const;
|
||||
std::string generate_impl(unsigned int label, char id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const;
|
||||
std::string generate_impl(unsigned int label, const char * id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const;
|
||||
std::vector<std::string> generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const;
|
||||
void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
|
||||
array_infos const & A, array_infos const & B, array_infos const & C,
|
||||
|
@@ -22,7 +22,7 @@ private:
|
||||
int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const;
|
||||
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_reduction*> exprs,
|
||||
std::string const & buf_str, std::string const & buf_value_str) const;
|
||||
std::string generate_impl(unsigned int label, char type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const;
|
||||
std::string generate_impl(unsigned int label, const char * type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const;
|
||||
std::vector<std::string> generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const;
|
||||
|
||||
public:
|
||||
|
@@ -5,15 +5,12 @@ add_library(atidlas SHARED ${LIBATIDLAS_SRC})
|
||||
set_target_properties(atidlas PROPERTIES
|
||||
COMPILE_FLAGS "-Wno-sign-compare -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic")
|
||||
|
||||
set(PREINSTALL_PY "${CMAKE_CURRENT_BINARY_DIR}/preinstall.py")
|
||||
|
||||
#install(TARGETS atidlas LIBRARY DESTINATION lib)
|
||||
#set(INSTALL_INCLUDE_DIR /usr/local/include)
|
||||
#install(DIRECTORY atidlas "${PROJECT_SOURCE_DIR}/include/atidlas"
|
||||
# DESTINATION "${INSTALL_INCLUDE_DIR}" FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
|
||||
|
||||
set(POSTINSTALL_PY "${CMAKE_CURRENT_BINARY_DIR}/postinstall.py")
|
||||
|
||||
configure_file("${PROJECT_SOURCE_DIR}/python/preinstall.py" ${PREINSTALL_PY})
|
||||
configure_file("${PROJECT_SOURCE_DIR}/python/postinstall.py" ${POSTINSTALL_PY})
|
||||
|
||||
install(CODE "execute_process(COMMAND ${PYTHON} ${PREINSTALL_PY})")
|
||||
install(TARGETS atidlas LIBRARY DESTINATION lib)
|
||||
set(INSTALL_INCLUDE_DIR /usr/local/include)
|
||||
install(DIRECTORY atidlas "${PROJECT_SOURCE_DIR}/include/atidlas"
|
||||
DESTINATION "${INSTALL_INCLUDE_DIR}" FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
|
||||
install(CODE "execute_process(COMMAND ${PYTHON} ${POST_INSTALL_SCRIPT_PY})")
|
||||
install(CODE "execute_process(COMMAND ${PYTHON} ${POSTINSTALL_PY})")
|
||||
|
@@ -29,8 +29,11 @@ std::string maxpy::generate_impl(unsigned int label, symbolic_expressions_contai
|
||||
|
||||
std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1;
|
||||
|
||||
char kprefix[10];
|
||||
fill_kernel_name(kprefix, label, "d");
|
||||
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
|
||||
stream << "__kernel void " << "k" << label << "d" << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
|
@@ -87,7 +87,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::string mproduct::generate_impl(unsigned int label, char id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const
|
||||
std::string mproduct::generate_impl(unsigned int label, const char * id, const symbolic_expressions_container &symbolic_expressions, const std::vector<mapping_type> &, bool fallback) const
|
||||
{
|
||||
using std::string;
|
||||
using tools::to_string;
|
||||
@@ -121,7 +121,10 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
/// //////////////
|
||||
std::string widthdtype = append_width("#scalartype", p.simd_width);
|
||||
stream << " __attribute__((reqd_work_group_size(" << p.local_size_0 << "," << p.local_size_1 << ",1)))" << std::endl;
|
||||
stream << "__kernel void " << "k" << label << id << "(unsigned int M, unsigned int N, unsigned int K, "
|
||||
char kprefix[10];
|
||||
fill_kernel_name(kprefix, label, id);
|
||||
|
||||
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, unsigned int K, "
|
||||
<< C.process("__global #scalartype* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,")
|
||||
<< alpha.process("#scalartype #name,")
|
||||
<< A.process("__global " + widthdtype + "* #pointer, uint #ld, uint #start1, uint #start2, uint #stride1, uint #stride2,")
|
||||
@@ -557,8 +560,8 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
std::vector<std::string> mproduct::generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<std::string> res;
|
||||
res.push_back(generate_impl(label, 'o', symbolic_expressions, mappings, false));
|
||||
res.push_back(generate_impl(label, 'f', symbolic_expressions, mappings, true));
|
||||
res.push_back(generate_impl(label, "o", symbolic_expressions, mappings, false));
|
||||
res.push_back(generate_impl(label, "f", symbolic_expressions, mappings, true));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@@ -36,8 +36,11 @@ std::string mreduction::generate_impl(unsigned int label, symbolic_expressions_c
|
||||
|
||||
kernel_generation_stream stream;
|
||||
|
||||
char kprefix[10];
|
||||
fill_kernel_name(kprefix, label, "d");
|
||||
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
|
||||
stream << "__kernel void " << "k" << label << "d" << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
|
@@ -56,7 +56,7 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream,
|
||||
stream << "}" << std::endl;
|
||||
}
|
||||
|
||||
std::string reduction::generate_impl(unsigned int label, char type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
|
||||
std::string reduction::generate_impl(unsigned int label, const char * type, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
|
||||
{
|
||||
kernel_generation_stream stream;
|
||||
|
||||
@@ -85,8 +85,11 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp
|
||||
/* ------------------------
|
||||
* First Kernel
|
||||
* -----------------------*/
|
||||
char kprefix[10];
|
||||
fill_kernel_name(kprefix, label, type);
|
||||
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
|
||||
stream << "__kernel void " << "k" << label << type << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "0" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
@@ -191,7 +194,7 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp
|
||||
* Second kernel
|
||||
* -----------------------*/
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl;
|
||||
stream << "__kernel void " << "k" << label << type << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "__kernel void " << kprefix << "1" << "(" << arguments << generate_arguments("#scalartype", mappings, symbolic_expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
@@ -256,8 +259,8 @@ std::string reduction::generate_impl(unsigned int label, char type, symbolic_exp
|
||||
std::vector<std::string> reduction::generate_impl(unsigned int label, symbolic_expressions_container const & symbolic_expressions, std::vector<mapping_type> const & mappings) const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
result.push_back(generate_impl(label, 'f', symbolic_expressions, mappings, 1));
|
||||
result.push_back(generate_impl(label, 'o', symbolic_expressions, mappings, p_.simd_width));
|
||||
result.push_back(generate_impl(label, "f", symbolic_expressions, mappings, 1));
|
||||
result.push_back(generate_impl(label, "o", symbolic_expressions, mappings, p_.simd_width));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@@ -142,14 +142,14 @@ namespace detail
|
||||
static expression_type get_expression_type(std::string const & name)
|
||||
{
|
||||
if(name=="vaxpy") return VECTOR_AXPY_TYPE;
|
||||
if(name=="reduction") return REDUCTION_TYPE;
|
||||
if(name=="dot") return REDUCTION_TYPE;
|
||||
if(name=="maxpy") return MATRIX_AXPY_TYPE;
|
||||
if(name=="row-wise-reductionN") return ROW_WISE_REDUCTION_TYPE;
|
||||
if(name=="row-wise-reductionT") return COL_WISE_REDUCTION_TYPE;
|
||||
if(name=="matrix-productNN") return MATRIX_PRODUCT_NN_TYPE;
|
||||
if(name=="matrix-productNT") return MATRIX_PRODUCT_NT_TYPE;
|
||||
if(name=="matrix-productTN") return MATRIX_PRODUCT_TN_TYPE;
|
||||
if(name=="matrix-productTT") return MATRIX_PRODUCT_TT_TYPE;
|
||||
if(name=="gemvN") return ROW_WISE_REDUCTION_TYPE;
|
||||
if(name=="gemvT") return COL_WISE_REDUCTION_TYPE;
|
||||
if(name=="gemmNN") return MATRIX_PRODUCT_NN_TYPE;
|
||||
if(name=="gemmNT") return MATRIX_PRODUCT_NT_TYPE;
|
||||
if(name=="gemmTN") return MATRIX_PRODUCT_TN_TYPE;
|
||||
if(name=="gemmTT") return MATRIX_PRODUCT_TT_TYPE;
|
||||
throw ;
|
||||
}
|
||||
|
||||
@@ -164,22 +164,23 @@ namespace detail
|
||||
{
|
||||
fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS};
|
||||
if(template_name=="vaxpy")
|
||||
return tools::shared_ptr<base>(new vaxpy( vaxpy_parameters(a[0], a[1], a[2], fetch[a[3]])));
|
||||
else if(template_name=="reduction")
|
||||
return tools::shared_ptr<base>(new reduction( reduction_parameters(a[0], a[1], a[2], fetch[a[3]])));
|
||||
return tools::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
|
||||
else if(template_name=="dot")
|
||||
return tools::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
|
||||
else if(template_name=="maxpy")
|
||||
return tools::shared_ptr<base>(new maxpy( maxpy_parameters(a[0], a[1], a[2], a[3], a[4], fetch[a[5]])));
|
||||
else if(template_name.find("row-wise-reduction")!=std::string::npos)
|
||||
{
|
||||
return tools::shared_ptr<base>(new mreduction_rows( mreduction_parameters(a[0], a[1], a[2], a[3], fetch[a[4]])));
|
||||
}
|
||||
else if(template_name.find("matrix-product")!=std::string::npos)
|
||||
{
|
||||
char A_trans = template_name[15];
|
||||
char B_trans = template_name[16];
|
||||
return tools::shared_ptr<base>(new mproduct( mproduct_parameters(a[0], a[1], a[2], a[3], a[4], a[5], a[6],
|
||||
fetch[a[7]], fetch[a[8]], a[9], a[10]), A_trans, B_trans));
|
||||
}
|
||||
return tools::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
|
||||
else if(template_name.find("gemvN")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||
else if(template_name.find("gemvT")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||
else if(template_name.find("gemmNN")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else if(template_name.find("gemmTN")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else if(template_name.find("gemmNT")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else if(template_name.find("gemmTT")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else
|
||||
throw operation_not_supported_exception("Cannot create the given operation");
|
||||
}
|
||||
@@ -198,9 +199,9 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
|
||||
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
|
||||
document.Parse<0>(str.c_str());
|
||||
//Deserialize
|
||||
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "reduction"
|
||||
<< "maxpy" << "row-wise-reductionN" << "row-wise-reductionT"
|
||||
<< "matrix-productNN" << "matrix-productTN" << "matrix-productNT" << "matrix-productTT";
|
||||
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "dot"
|
||||
<< "maxpy" << "gemvN" << "gemvT"
|
||||
<< "gemmNN" << "gemmTN" << "gemmTT";
|
||||
std::vector<std::string> dtype = tools::make_vector<std::string>() << "float32" << "float64";
|
||||
for(std::vector<std::string>::iterator op = operations.begin() ; op != operations.end() ; ++op)
|
||||
{
|
||||
|
Reference in New Issue
Block a user