removing C++11 interface
This commit is contained in:
@@ -33,7 +33,7 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
|
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
|
||||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR} -std=c++11")
|
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR}")
|
||||||
foreach(PROG blas overhead)
|
foreach(PROG blas overhead)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
if(CUDA_FOUND)
|
if(CUDA_FOUND)
|
||||||
@@ -44,7 +44,7 @@ foreach(PROG blas overhead)
|
|||||||
cuda_add_cublas_to_target(${PROG}-bench)
|
cuda_add_cublas_to_target(${PROG}-bench)
|
||||||
else()
|
else()
|
||||||
add_executable(${PROG}-bench ${PROG}.cpp)
|
add_executable(${PROG}-bench ${PROG}.cpp)
|
||||||
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR} -std=c++11")
|
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR}")
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(${PROG}-bench ${BLAS_LIBS})
|
target_link_libraries(${PROG}-bench ${BLAS_LIBS})
|
||||||
endforeach(PROG)
|
endforeach(PROG)
|
||||||
|
211
bench/blas.cpp
211
bench/blas.cpp
@@ -1,5 +1,6 @@
|
|||||||
#include "atidlas/array.h"
|
#include "atidlas/array.h"
|
||||||
#include "atidlas/symbolic/execute.h"
|
#include "atidlas/symbolic/execute.h"
|
||||||
|
#include "atidlas/tools/timer.hpp"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#ifdef BENCH_CLAMDBLAS
|
#ifdef BENCH_CLAMDBLAS
|
||||||
#include "clAmdBlas.h"
|
#include "clAmdBlas.h"
|
||||||
@@ -13,27 +14,89 @@
|
|||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <chrono>
|
#include <numeric>
|
||||||
|
|
||||||
namespace ad = atidlas;
|
namespace ad = atidlas;
|
||||||
typedef ad::int_t int_t;
|
typedef ad::int_t int_t;
|
||||||
|
|
||||||
template<class T>
|
int ceil(int N, int pad)
|
||||||
void bench(ad::numeric_type dtype)
|
|
||||||
{
|
{
|
||||||
unsigned int dtsize = ad::size_of(dtype);
|
return (N%pad==0)?N:(N+pad-1)/pad*pad;
|
||||||
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
|
}
|
||||||
|
|
||||||
|
std::vector<int> create_log_range(int min, int max, int N, int pad)
|
||||||
|
{
|
||||||
|
std::vector<int> res(N);
|
||||||
|
for(int i = 0 ; i < N ; ++i)
|
||||||
|
{
|
||||||
|
res[i] = std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N);
|
||||||
|
res[i] = ceil(res[i], pad);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> create_full_range(int min, int max, int pad)
|
||||||
|
{
|
||||||
|
std::vector<int> N;
|
||||||
|
for(int i = ceil(min, pad) ; i < ceil(max, pad) ; i+=pad)
|
||||||
|
N.push_back(i);
|
||||||
|
return N;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class make_vector {
|
||||||
|
public:
|
||||||
|
typedef make_vector<T> my_type;
|
||||||
|
my_type& operator<< (const T& val) {
|
||||||
|
data_.push_back(val);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
operator std::vector<T>() const {
|
||||||
|
return data_;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
std::vector<T> data_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T median(std::vector<T> x)
|
||||||
|
{
|
||||||
|
size_t size = x.size();
|
||||||
|
std::sort(x.begin(), x.end());
|
||||||
|
if (size % 2 == 0)
|
||||||
|
return (x[size / 2 - 1] + x[size / 2]) / 2;
|
||||||
|
else
|
||||||
|
return x[size / 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T mean(std::vector<T> x)
|
||||||
|
{
|
||||||
|
T res = 0;
|
||||||
|
int N = x.size();
|
||||||
|
for(int i = 0 ; i < N ; ++i)
|
||||||
|
res += x[i];
|
||||||
|
return res/N;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double time_event(unsigned long sum, cl::Event const & e)
|
||||||
|
{ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_START>();}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
void bench(ad::numeric_type dtype){
|
||||||
|
|
||||||
#define BENCHMARK_ATIDLAS(OP, PERF) \
|
#define BENCHMARK_ATIDLAS(OP, PERF) \
|
||||||
{\
|
{\
|
||||||
std::vector<long> times;\
|
std::vector<long> times;\
|
||||||
double total_time = 0;\
|
double total_time = 0;\
|
||||||
while(total_time*1e-9 < 1e-1){\
|
while(total_time*1e-9 < 1e-2){\
|
||||||
std::list<cl::Event> events;\
|
std::list<cl::Event> events;\
|
||||||
OP;\
|
OP;\
|
||||||
queue.finish();\
|
queue.finish();\
|
||||||
times.push_back(std::accumulate(events.begin(), events.end(), 0, \
|
times.push_back(std::accumulate(events.begin(), events.end(), 0, &time_event));\
|
||||||
[](unsigned long sum, cl::Event const & e){ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();}));\
|
|
||||||
total_time+=times.back();\
|
total_time+=times.back();\
|
||||||
}\
|
}\
|
||||||
double t = median(times);\
|
double t = median(times);\
|
||||||
@@ -44,11 +107,11 @@ void bench(ad::numeric_type dtype)
|
|||||||
{\
|
{\
|
||||||
std::vector<long> times;\
|
std::vector<long> times;\
|
||||||
double total_time = 0;\
|
double total_time = 0;\
|
||||||
while(total_time*1e-9 < 1e-1){\
|
while(total_time*1e-9 < 1e-2){\
|
||||||
cl::Event event;\
|
cl::Event event;\
|
||||||
OP;\
|
OP;\
|
||||||
queue.finish();\
|
queue.finish();\
|
||||||
times.push_back(event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>());\
|
times.push_back(event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_START>());\
|
||||||
total_time+=times.back();\
|
total_time+=times.back();\
|
||||||
}\
|
}\
|
||||||
double t = median(times);\
|
double t = median(times);\
|
||||||
@@ -57,11 +120,11 @@ void bench(ad::numeric_type dtype)
|
|||||||
|
|
||||||
#define BENCHMARK_HOST(OP, PERF) \
|
#define BENCHMARK_HOST(OP, PERF) \
|
||||||
{\
|
{\
|
||||||
|
ad::tools::timer tmr;\
|
||||||
std::vector<int> cache_flusher(10000000, 0);\
|
std::vector<int> cache_flusher(10000000, 0);\
|
||||||
auto start = std::chrono::steady_clock::now();\
|
tmr.start();\
|
||||||
OP;\
|
OP;\
|
||||||
auto end = std::chrono::steady_clock::now();\
|
double t = 1e9*tmr.get();\
|
||||||
double t = std::chrono::duration<double, std::nano>(end - start).count();\
|
|
||||||
std::cout << " " << PERF << std::flush;\
|
std::cout << " " << PERF << std::flush;\
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,68 +149,49 @@ void bench(ad::numeric_type dtype)
|
|||||||
std::cout << " " << PERF << std::flush;\
|
std::cout << " " << PERF << std::flush;\
|
||||||
}
|
}
|
||||||
|
|
||||||
/*---------*/
|
unsigned int dtsize = ad::size_of(dtype);
|
||||||
/*--BLAS1--*/
|
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
|
||||||
/*---------*/
|
|
||||||
std::cout << "#AXPY" << std::endl;
|
|
||||||
for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
|
||||||
{
|
|
||||||
std::cout << N;
|
|
||||||
ad::array x(N, dtype), y(N, dtype);
|
|
||||||
/* ATIDLAS */
|
|
||||||
y = x + y; queue.flush(); queue.finish();
|
|
||||||
BENCHMARK_ATIDLAS(y = ad::control(x + y, ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)), 3*N*dtsize/t)
|
|
||||||
/* clAmdBlas */
|
|
||||||
#ifdef BENCH_CLAMDBLAS
|
|
||||||
BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
|
|
||||||
#endif
|
|
||||||
/* BLAS */
|
|
||||||
#ifdef BENCH_CBLAS
|
|
||||||
std::vector<float> cx(N), cy(N);
|
|
||||||
ad::copy(x, cx);
|
|
||||||
ad::copy(y, cy);
|
|
||||||
BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
|
||||||
#endif
|
|
||||||
/* CuBLAS */
|
|
||||||
#ifdef BENCH_CUBLAS
|
|
||||||
T *cux, *cuy;
|
|
||||||
cudaMalloc((void**) &cux, N * sizeof(T));
|
|
||||||
cudaMalloc((void**) &cuy, N * sizeof(T));
|
|
||||||
BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
|
||||||
cudaFree(cux);
|
|
||||||
cudaFree(cuy);
|
|
||||||
#endif
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
std::cout << "\n\n" << std::flush;
|
|
||||||
|
|
||||||
// std::cout << "#DOT" << std::endl;
|
// BLAS1 Sizes
|
||||||
|
static const std::vector<int> BLAS1_N = create_log_range(1e3, 2e7, 50, 64);
|
||||||
|
|
||||||
|
// BLAS2 Sizes
|
||||||
|
static const std::vector<int> BLAS2_N = make_vector<int>() << 64;
|
||||||
|
static const std::vector<int> BLAS2_M = create_full_range(128, 10000, 64);
|
||||||
|
|
||||||
|
// BLAS3 Sizes
|
||||||
|
static const std::vector<int> BLAS3_M = make_vector<int>() << 1024;
|
||||||
|
static const std::vector<int> BLAS3_N = make_vector<int>() << 128;
|
||||||
|
static const std::vector<int> BLAS3_K = create_full_range(128, 5000, 64);
|
||||||
|
|
||||||
|
// /*---------*/
|
||||||
|
// /*--BLAS1--*/
|
||||||
|
// /*---------*/
|
||||||
|
// std::cout << "#AXPY" << std::endl;
|
||||||
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
|
||||||
// {
|
// {
|
||||||
// std::cout << N;
|
// std::cout << N;
|
||||||
// /* ATIDLAS */
|
|
||||||
// ad::array x(N, dtype), y(N, dtype);
|
// ad::array x(N, dtype), y(N, dtype);
|
||||||
// ad::array scratch(N, dtype);
|
// /* ATIDLAS */
|
||||||
// ad::scalar s(dtype);
|
// y = x + y; queue.finish();
|
||||||
// s = dot(x,y); queue.flush(); queue.finish();
|
// BENCHMARK_ATIDLAS(y = ad::control(x + y, ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)), 3*N*dtsize/t)
|
||||||
// BENCHMARK_OPENCL(s = ad::controller<atidlas::array_expression>(dot(x,y), ad::execution_options_type(0, &event)), 2*N*dtsize/t)
|
|
||||||
// /* clAmdBlas */
|
// /* clAmdBlas */
|
||||||
//#ifdef BENCH_CLAMDBLAS
|
//#ifdef BENCH_CLAMDBLAS
|
||||||
// BENCHMARK_OPENCL(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &queue(), 0, NULL, &event()), 2*N*dtsize/t)
|
// BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
|
||||||
//#endif
|
//#endif
|
||||||
// /* BLAS */
|
// /* BLAS */
|
||||||
//#ifdef BENCH_CBLAS
|
//#ifdef BENCH_CBLAS
|
||||||
// std::vector<float> cx(N), cy(N);
|
// std::vector<float> cx(N), cy(N);
|
||||||
// ad::copy(x, cx);
|
// ad::copy(x, cx);
|
||||||
// ad::copy(y, cy);
|
// ad::copy(y, cy);
|
||||||
// BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
|
// BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
||||||
//#endif
|
//#endif
|
||||||
|
// /* CuBLAS */
|
||||||
//#ifdef BENCH_CUBLAS
|
//#ifdef BENCH_CUBLAS
|
||||||
// T *cux, *cuy;
|
// T *cux, *cuy;
|
||||||
// T result;
|
|
||||||
// cudaMalloc((void**) &cux, N * sizeof(T));
|
// cudaMalloc((void**) &cux, N * sizeof(T));
|
||||||
// cudaMalloc((void**) &cuy, N * sizeof(T));
|
// cudaMalloc((void**) &cuy, N * sizeof(T));
|
||||||
// BENCHMARK_CUDA(cublasSdot(N, cux, 1, cuy, 1, &result), 2*N*dtsize/t)
|
// BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
|
||||||
// cudaFree(cux);
|
// cudaFree(cux);
|
||||||
// cudaFree(cuy);
|
// cudaFree(cuy);
|
||||||
//#endif
|
//#endif
|
||||||
@@ -155,21 +199,56 @@ void bench(ad::numeric_type dtype)
|
|||||||
// }
|
// }
|
||||||
// std::cout << "\n\n" << std::flush;
|
// std::cout << "\n\n" << std::flush;
|
||||||
|
|
||||||
|
std::cout << "#DOT" << std::endl;
|
||||||
|
for(int_t i = 0 ; i < BLAS1_N.size() ; ++i)
|
||||||
|
{
|
||||||
|
int_t N = BLAS1_N[i];
|
||||||
|
std::cout << N;
|
||||||
|
/* ATIDLAS */
|
||||||
|
ad::array x(N, dtype), y(N, dtype);
|
||||||
|
ad::array scratch(N, dtype);
|
||||||
|
ad::scalar s(dtype);
|
||||||
|
s = dot(x,y); queue.finish();
|
||||||
|
BENCHMARK_ATIDLAS(s = ad::control(dot(x,y), ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)), 2*N*dtsize/t)
|
||||||
|
/* clAmdBlas */
|
||||||
|
#ifdef BENCH_CLAMDBLAS
|
||||||
|
BENCHMARK_CLAMDBLAS(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &queue(), 0, NULL, &event()), 2*N*dtsize/t)
|
||||||
|
#endif
|
||||||
|
/* BLAS */
|
||||||
|
#ifdef BENCH_CBLAS
|
||||||
|
std::vector<float> cx(N), cy(N);
|
||||||
|
ad::copy(x, cx);
|
||||||
|
ad::copy(y, cy);
|
||||||
|
BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
|
||||||
|
#endif
|
||||||
|
#ifdef BENCH_CUBLAS
|
||||||
|
T *cux, *cuy;
|
||||||
|
T result;
|
||||||
|
cudaMalloc((void**) &cux, N * sizeof(T));
|
||||||
|
cudaMalloc((void**) &cuy, N * sizeof(T));
|
||||||
|
BENCHMARK_CUDA(cublasSdot(N, cux, 1, cuy, 1, &result), 2*N*dtsize/t)
|
||||||
|
cudaFree(cux);
|
||||||
|
cudaFree(cuy);
|
||||||
|
#endif
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "\n\n" << std::flush;
|
||||||
|
|
||||||
// /*---------*/
|
// /*---------*/
|
||||||
// /*--BLAS2--*/
|
// /*--BLAS2--*/
|
||||||
// /*---------*/
|
// /*---------*/
|
||||||
// //T-layout
|
// //T-layout
|
||||||
// std::cout << "#GEMV-T" << std::endl;
|
// std::cout << "#GEMV-T" << std::endl;
|
||||||
// for(int_t N: std::vector<int>{64})
|
// for(int_t N: std::vector<int>{128})
|
||||||
// for(int_t M: create_full_range(128, 10000, 64))
|
// for(int_t M: create_full_range(128, 10000, 64))
|
||||||
// {
|
// {
|
||||||
// std::cout << M << "," << N;
|
// std::cout << M << "," << N;
|
||||||
// /* ATIDLAS */
|
// /* ATIDLAS */
|
||||||
// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||||
// y = dot(trans(A),x); queue.flush(); queue.finish();
|
// y = dot(trans(A),x); queue.finish();
|
||||||
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
|
// BENCHMARK_ATIDLAS(y = ad::control(dot(trans(A),x), ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t);
|
||||||
// #ifdef BENCH_CLAMDBLAS
|
// #ifdef BENCH_CLAMDBLAS
|
||||||
// BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
|
// BENCHMARK_CLAMDBLAS(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
|
||||||
// #endif
|
// #endif
|
||||||
// #ifdef BENCH_CBLAS
|
// #ifdef BENCH_CBLAS
|
||||||
// std::vector<float> cA(N*M), cx(N), cy(M);
|
// std::vector<float> cA(N*M), cx(N), cy(M);
|
||||||
@@ -192,9 +271,9 @@ void bench(ad::numeric_type dtype)
|
|||||||
// }
|
// }
|
||||||
// std::cout << "\n\n" << std::flush;
|
// std::cout << "\n\n" << std::flush;
|
||||||
|
|
||||||
//// /*---------*/
|
// /*---------*/
|
||||||
//// /*--BLAS3--*/
|
// /*--BLAS3--*/
|
||||||
//// /*---------*/
|
// /*---------*/
|
||||||
// std::cout << "#GEMM-NT" << std::endl;
|
// std::cout << "#GEMM-NT" << std::endl;
|
||||||
// for(std::vector<int_t>::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit)
|
// for(std::vector<int_t>::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit)
|
||||||
// for(std::vector<int_t>::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit)
|
// for(std::vector<int_t>::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit)
|
||||||
@@ -240,8 +319,8 @@ int main(int argc, char* argv[])
|
|||||||
std::cerr << "usage : blas-bench [DEVICE_IDX]" << std::endl;
|
std::cerr << "usage : blas-bench [DEVICE_IDX]" << std::endl;
|
||||||
std::cout << "Devices available: " << std::endl;
|
std::cout << "Devices available: " << std::endl;
|
||||||
unsigned int current=0;
|
unsigned int current=0;
|
||||||
for(const auto & queue : queues){
|
for(ad::cl_ext::queues_type::data_type::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
|
||||||
cl::Device device = queue.first.getInfo<CL_CONTEXT_DEVICES>()[0];
|
cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
|
||||||
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
|
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
|
||||||
}
|
}
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
|
@@ -5,83 +5,6 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
int ceil(int N, int pad)
|
|
||||||
{
|
|
||||||
return (N%pad==0)?N:(N+pad-1)/pad*pad;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> create_log_range(int min, int max, int N, int pad)
|
|
||||||
{
|
|
||||||
std::vector<int> res(N);
|
|
||||||
for(int i = 0 ; i < N ; ++i)
|
|
||||||
{
|
|
||||||
res[i] = std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N);
|
|
||||||
res[i] = ceil(res[i], pad);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> create_full_range(int min, int max, int pad)
|
|
||||||
{
|
|
||||||
std::vector<int> N;
|
|
||||||
for(int i = ceil(min, pad) ; i < ceil(max, pad) ; i+=pad)
|
|
||||||
N.push_back(i);
|
|
||||||
return N;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
class make_vector {
|
|
||||||
public:
|
|
||||||
typedef make_vector<T> my_type;
|
|
||||||
my_type& operator<< (const T& val) {
|
|
||||||
data_.push_back(val);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
operator std::vector<T>() const {
|
|
||||||
return data_;
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
std::vector<T> data_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// BLAS1 Sizes
|
|
||||||
static const std::vector<int> BLAS1_N = create_log_range(1e3, 2e7, 50, 64);
|
|
||||||
|
|
||||||
// BLAS2 Sizes
|
|
||||||
static const std::vector<int> BLAS2_N = make_vector<int>() << 64;
|
|
||||||
static const std::vector<int> BLAS2_M = create_full_range(128, 10000, 64);
|
|
||||||
|
|
||||||
// BLAS3 Sizes
|
|
||||||
static const std::vector<int> BLAS3_M = make_vector<int>() << 1024;
|
|
||||||
static const std::vector<int> BLAS3_N = make_vector<int>() << 128;
|
|
||||||
static const std::vector<int> BLAS3_K = create_full_range(128, 5000, 64);
|
|
||||||
|
|
||||||
|
|
||||||
double bandwidth(std::size_t N, double t, unsigned int dtsize)
|
|
||||||
{ return N * dtsize * 1e-9 / t; }
|
|
||||||
|
|
||||||
double gflops(double nops, double t)
|
|
||||||
{ return nops * 1e-9 / t; }
|
|
||||||
|
|
||||||
template<class T>
|
|
||||||
T median(std::vector<T> x)
|
|
||||||
{
|
|
||||||
size_t size = x.size();
|
|
||||||
std::sort(x.begin(), x.end());
|
|
||||||
if (size % 2 == 0)
|
|
||||||
return (x[size / 2 - 1] + x[size / 2]) / 2;
|
|
||||||
else
|
|
||||||
return x[size / 2];
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class T>
|
|
||||||
T mean(std::vector<T> x)
|
|
||||||
{
|
|
||||||
T res = 0;
|
|
||||||
int N = x.size();
|
|
||||||
for(int i = 0 ; i < N ; ++i)
|
|
||||||
res += x[i];
|
|
||||||
return res/N;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -160,26 +160,21 @@
|
|||||||
|
|
||||||
#pragma push_macro("max")
|
#pragma push_macro("max")
|
||||||
#undef max
|
#undef max
|
||||||
#if defined(USE_DX_INTEROP)
|
|
||||||
#include <CL/cl_d3d10.h>
|
|
||||||
#include <CL/cl_dx9_media_sharing.h>
|
|
||||||
#endif
|
|
||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
|
||||||
|
#if defined(__APPLE__) || defined(__MACOSX)
|
||||||
|
#include <OpenCL/opencl.h>
|
||||||
|
#include <libkern/OSAtomic.h>
|
||||||
|
#else
|
||||||
|
#include <CL/opencl.h>
|
||||||
|
#endif // !__APPLE__
|
||||||
|
|
||||||
//
|
//
|
||||||
#if defined(USE_CL_DEVICE_FISSION)
|
#if defined(USE_CL_DEVICE_FISSION)
|
||||||
#include <CL/cl_ext.h>
|
#include <CL/cl_ext.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__APPLE__) || defined(__MACOSX)
|
|
||||||
#include <OpenGL/OpenGL.h>
|
|
||||||
#include <OpenCL/opencl.h>
|
|
||||||
#include <libkern/OSAtomic.h>
|
|
||||||
#else
|
|
||||||
#include <GL/gl.h>
|
|
||||||
#include <CL/opencl.h>
|
|
||||||
#endif // !__APPLE__
|
|
||||||
|
|
||||||
// To avoid accidentally taking ownership of core OpenCL types
|
// To avoid accidentally taking ownership of core OpenCL types
|
||||||
// such as cl_kernel constructors are made explicit
|
// such as cl_kernel constructors are made explicit
|
||||||
// under OpenCL 1.2
|
// under OpenCL 1.2
|
||||||
@@ -367,9 +362,6 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
|
|||||||
#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer)
|
#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer)
|
||||||
#define __COPY_ERR __ERR_STR(cl::copy)
|
#define __COPY_ERR __ERR_STR(cl::copy)
|
||||||
#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer)
|
#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer)
|
||||||
#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer)
|
|
||||||
#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer)
|
|
||||||
#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo)
|
|
||||||
#if defined(CL_VERSION_1_2)
|
#if defined(CL_VERSION_1_2)
|
||||||
#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage)
|
#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage)
|
||||||
#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture)
|
#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture)
|
||||||
@@ -3219,266 +3211,6 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined (USE_DX_INTEROP)
|
|
||||||
/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's.
|
|
||||||
*
|
|
||||||
* This is provided to facilitate interoperability with Direct3D.
|
|
||||||
*
|
|
||||||
* See Memory for details about copy semantics, etc.
|
|
||||||
*
|
|
||||||
* \see Memory
|
|
||||||
*/
|
|
||||||
class BufferD3D10 : public Buffer
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
|
|
||||||
cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer,
|
|
||||||
cl_int* errcode_ret);
|
|
||||||
|
|
||||||
/*! \brief Constructs a BufferD3D10, in a specified context, from a
|
|
||||||
* given ID3D10Buffer.
|
|
||||||
*
|
|
||||||
* Wraps clCreateFromD3D10BufferKHR().
|
|
||||||
*/
|
|
||||||
BufferD3D10(
|
|
||||||
const Context& context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
ID3D10Buffer* bufobj,
|
|
||||||
cl_int * err = NULL)
|
|
||||||
{
|
|
||||||
static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
|
|
||||||
|
|
||||||
#if defined(CL_VERSION_1_2)
|
|
||||||
vector<cl_context_properties> props = context.getInfo<CL_CONTEXT_PROPERTIES>();
|
|
||||||
cl_platform platform = -1;
|
|
||||||
for( int i = 0; i < props.size(); ++i ) {
|
|
||||||
if( props[i] == CL_CONTEXT_PLATFORM ) {
|
|
||||||
platform = props[i+1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
__INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR);
|
|
||||||
#endif
|
|
||||||
#if defined(CL_VERSION_1_1)
|
|
||||||
__INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
cl_int error;
|
|
||||||
object_ = pfn_clCreateFromD3D10BufferKHR(
|
|
||||||
context(),
|
|
||||||
flags,
|
|
||||||
bufobj,
|
|
||||||
&error);
|
|
||||||
|
|
||||||
detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Default constructor - initializes to NULL.
|
|
||||||
BufferD3D10() : Buffer() { }
|
|
||||||
|
|
||||||
/*! \brief Copy constructor - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
|
|
||||||
|
|
||||||
/*! \brief Constructor from cl_mem - takes ownership.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
__CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { }
|
|
||||||
|
|
||||||
/*! \brief Assignment from BufferD3D10 - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferD3D10& operator = (const BufferD3D10& rhs)
|
|
||||||
{
|
|
||||||
if (this != &rhs) {
|
|
||||||
Buffer::operator=(rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief Assignment from cl_mem - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferD3D10& operator = (const cl_mem& rhs)
|
|
||||||
{
|
|
||||||
Buffer::operator=(rhs);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*! \brief Class interface for GL Buffer Memory Objects.
|
|
||||||
*
|
|
||||||
* This is provided to facilitate interoperability with OpenGL.
|
|
||||||
*
|
|
||||||
* See Memory for details about copy semantics, etc.
|
|
||||||
*
|
|
||||||
* \see Memory
|
|
||||||
*/
|
|
||||||
class BufferGL : public Buffer
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/*! \brief Constructs a BufferGL in a specified context, from a given
|
|
||||||
* GL buffer.
|
|
||||||
*
|
|
||||||
* Wraps clCreateFromGLBuffer().
|
|
||||||
*/
|
|
||||||
BufferGL(
|
|
||||||
const Context& context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
GLuint bufobj,
|
|
||||||
cl_int * err = NULL)
|
|
||||||
{
|
|
||||||
cl_int error;
|
|
||||||
object_ = ::clCreateFromGLBuffer(
|
|
||||||
context(),
|
|
||||||
flags,
|
|
||||||
bufobj,
|
|
||||||
&error);
|
|
||||||
|
|
||||||
detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Default constructor - initializes to NULL.
|
|
||||||
BufferGL() : Buffer() { }
|
|
||||||
|
|
||||||
/*! \brief Copy constructor - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
|
|
||||||
|
|
||||||
/*! \brief Constructor from cl_mem - takes ownership.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
__CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { }
|
|
||||||
|
|
||||||
/*! \brief Assignment from BufferGL - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferGL& operator = (const BufferGL& rhs)
|
|
||||||
{
|
|
||||||
if (this != &rhs) {
|
|
||||||
Buffer::operator=(rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief Assignment from cl_mem - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferGL& operator = (const cl_mem& rhs)
|
|
||||||
{
|
|
||||||
Buffer::operator=(rhs);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Wrapper for clGetGLObjectInfo().
|
|
||||||
cl_int getObjectInfo(
|
|
||||||
cl_gl_object_type *type,
|
|
||||||
GLuint * gl_object_name)
|
|
||||||
{
|
|
||||||
return detail::errHandler(
|
|
||||||
::clGetGLObjectInfo(object_,type,gl_object_name),
|
|
||||||
__GET_GL_OBJECT_INFO_ERR);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*! \brief Class interface for GL Render Buffer Memory Objects.
|
|
||||||
*
|
|
||||||
* This is provided to facilitate interoperability with OpenGL.
|
|
||||||
*
|
|
||||||
* See Memory for details about copy semantics, etc.
|
|
||||||
*
|
|
||||||
* \see Memory
|
|
||||||
*/
|
|
||||||
class BufferRenderGL : public Buffer
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/*! \brief Constructs a BufferRenderGL in a specified context, from a given
|
|
||||||
* GL Renderbuffer.
|
|
||||||
*
|
|
||||||
* Wraps clCreateFromGLRenderbuffer().
|
|
||||||
*/
|
|
||||||
BufferRenderGL(
|
|
||||||
const Context& context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
GLuint bufobj,
|
|
||||||
cl_int * err = NULL)
|
|
||||||
{
|
|
||||||
cl_int error;
|
|
||||||
object_ = ::clCreateFromGLRenderbuffer(
|
|
||||||
context(),
|
|
||||||
flags,
|
|
||||||
bufobj,
|
|
||||||
&error);
|
|
||||||
|
|
||||||
detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Default constructor - initializes to NULL.
|
|
||||||
BufferRenderGL() : Buffer() { }
|
|
||||||
|
|
||||||
/*! \brief Copy constructor - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
|
|
||||||
|
|
||||||
/*! \brief Constructor from cl_mem - takes ownership.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
__CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { }
|
|
||||||
|
|
||||||
/*! \brief Assignment from BufferGL - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferRenderGL& operator = (const BufferRenderGL& rhs)
|
|
||||||
{
|
|
||||||
if (this != &rhs) {
|
|
||||||
Buffer::operator=(rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief Assignment from cl_mem - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
BufferRenderGL& operator = (const cl_mem& rhs)
|
|
||||||
{
|
|
||||||
Buffer::operator=(rhs);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Wrapper for clGetGLObjectInfo().
|
|
||||||
cl_int getObjectInfo(
|
|
||||||
cl_gl_object_type *type,
|
|
||||||
GLuint * gl_object_name)
|
|
||||||
{
|
|
||||||
return detail::errHandler(
|
|
||||||
::clGetGLObjectInfo(object_,type,gl_object_name),
|
|
||||||
__GET_GL_OBJECT_INFO_ERR);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/*! \brief C++ base class for Image Memory objects.
|
/*! \brief C++ base class for Image Memory objects.
|
||||||
*
|
*
|
||||||
@@ -3869,86 +3601,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#if !defined(CL_VERSION_1_2)
|
|
||||||
/*! \brief Class interface for GL 2D Image Memory objects.
|
|
||||||
*
|
|
||||||
* This is provided to facilitate interoperability with OpenGL.
|
|
||||||
*
|
|
||||||
* See Memory for details about copy semantics, etc.
|
|
||||||
*
|
|
||||||
* \see Memory
|
|
||||||
* \note Deprecated for OpenCL 1.2. Please use ImageGL instead.
|
|
||||||
*/
|
|
||||||
class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/*! \brief Constructs an Image2DGL in a specified context, from a given
|
|
||||||
* GL Texture.
|
|
||||||
*
|
|
||||||
* Wraps clCreateFromGLTexture2D().
|
|
||||||
*/
|
|
||||||
Image2DGL(
|
|
||||||
const Context& context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
GLenum target,
|
|
||||||
GLint miplevel,
|
|
||||||
GLuint texobj,
|
|
||||||
cl_int * err = NULL)
|
|
||||||
{
|
|
||||||
cl_int error;
|
|
||||||
object_ = ::clCreateFromGLTexture2D(
|
|
||||||
context(),
|
|
||||||
flags,
|
|
||||||
target,
|
|
||||||
miplevel,
|
|
||||||
texobj,
|
|
||||||
&error);
|
|
||||||
|
|
||||||
detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = error;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Default constructor - initializes to NULL.
|
|
||||||
Image2DGL() : Image2D() { }
|
|
||||||
|
|
||||||
/*! \brief Copy constructor - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
Image2DGL(const Image2DGL& image) : Image2D(image) { }
|
|
||||||
|
|
||||||
/*! \brief Constructor from cl_mem - takes ownership.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
__CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { }
|
|
||||||
|
|
||||||
/*! \brief Assignment from Image2DGL - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
Image2DGL& operator = (const Image2DGL& rhs)
|
|
||||||
{
|
|
||||||
if (this != &rhs) {
|
|
||||||
Image2D::operator=(rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief Assignment from cl_mem - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
Image2DGL& operator = (const cl_mem& rhs)
|
|
||||||
{
|
|
||||||
Image2D::operator=(rhs);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
#endif // #if !defined(CL_VERSION_1_2)
|
|
||||||
|
|
||||||
#if defined(CL_VERSION_1_2)
|
#if defined(CL_VERSION_1_2)
|
||||||
/*! \class Image2DArray
|
/*! \class Image2DArray
|
||||||
@@ -4138,246 +3790,6 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if !defined(CL_VERSION_1_2)
|
|
||||||
/*! \brief Class interface for GL 3D Image Memory objects.
|
|
||||||
*
|
|
||||||
* This is provided to facilitate interoperability with OpenGL.
|
|
||||||
*
|
|
||||||
* See Memory for details about copy semantics, etc.
|
|
||||||
*
|
|
||||||
* \see Memory
|
|
||||||
*/
|
|
||||||
class Image3DGL : public Image3D
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/*! \brief Constructs an Image3DGL in a specified context, from a given
|
|
||||||
* GL Texture.
|
|
||||||
*
|
|
||||||
* Wraps clCreateFromGLTexture3D().
|
|
||||||
*/
|
|
||||||
Image3DGL(
|
|
||||||
const Context& context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
GLenum target,
|
|
||||||
GLint miplevel,
|
|
||||||
GLuint texobj,
|
|
||||||
cl_int * err = NULL)
|
|
||||||
{
|
|
||||||
cl_int error;
|
|
||||||
object_ = ::clCreateFromGLTexture3D(
|
|
||||||
context(),
|
|
||||||
flags,
|
|
||||||
target,
|
|
||||||
miplevel,
|
|
||||||
texobj,
|
|
||||||
&error);
|
|
||||||
|
|
||||||
detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Default constructor - initializes to NULL.
|
|
||||||
Image3DGL() : Image3D() { }
|
|
||||||
|
|
||||||
/*! \brief Copy constructor - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
Image3DGL(const Image3DGL& image) : Image3D(image) { }
|
|
||||||
|
|
||||||
/*! \brief Constructor from cl_mem - takes ownership.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
__CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { }
|
|
||||||
|
|
||||||
/*! \brief Assignment from Image3DGL - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
Image3DGL& operator = (const Image3DGL& rhs)
|
|
||||||
{
|
|
||||||
if (this != &rhs) {
|
|
||||||
Image3D::operator=(rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief Assignment from cl_mem - performs shallow copy.
|
|
||||||
*
|
|
||||||
* See Memory for further details.
|
|
||||||
*/
|
|
||||||
Image3DGL& operator = (const cl_mem& rhs)
|
|
||||||
{
|
|
||||||
Image3D::operator=(rhs);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
#endif // #if !defined(CL_VERSION_1_2)
|
|
||||||
|
|
||||||
#if defined(CL_VERSION_1_2)
|
|
||||||
/*! \class ImageGL
|
|
||||||
* \brief general image interface for GL interop.
|
|
||||||
* We abstract the 2D and 3D GL images into a single instance here
|
|
||||||
* that wraps all GL sourced images on the grounds that setup information
|
|
||||||
* was performed by OpenCL anyway.
|
|
||||||
*/
|
|
||||||
class ImageGL : public Image
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ImageGL(
|
|
||||||
const Context& context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
GLenum target,
|
|
||||||
GLint miplevel,
|
|
||||||
GLuint texobj,
|
|
||||||
cl_int * err = NULL)
|
|
||||||
{
|
|
||||||
cl_int error;
|
|
||||||
object_ = ::clCreateFromGLTexture(
|
|
||||||
context(),
|
|
||||||
flags,
|
|
||||||
target,
|
|
||||||
miplevel,
|
|
||||||
texobj,
|
|
||||||
&error);
|
|
||||||
|
|
||||||
detail::errHandler(error, __CREATE_GL_TEXTURE_ERR);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ImageGL() : Image() { }
|
|
||||||
|
|
||||||
ImageGL(const ImageGL& image) : Image(image) { }
|
|
||||||
|
|
||||||
__CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { }
|
|
||||||
|
|
||||||
ImageGL& operator = (const ImageGL& rhs)
|
|
||||||
{
|
|
||||||
if (this != &rhs) {
|
|
||||||
Image::operator=(rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
ImageGL& operator = (const cl_mem& rhs)
|
|
||||||
{
|
|
||||||
Image::operator=(rhs);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
#endif // #if defined(CL_VERSION_1_2)
|
|
||||||
|
|
||||||
/*! \brief Class interface for cl_sampler.
|
|
||||||
*
|
|
||||||
* \note Copies of these objects are shallow, meaning that the copy will refer
|
|
||||||
* to the same underlying cl_sampler as the original. For details, see
|
|
||||||
* clRetainSampler() and clReleaseSampler().
|
|
||||||
*
|
|
||||||
* \see cl_sampler
|
|
||||||
*/
|
|
||||||
class Sampler : public detail::Wrapper<cl_sampler>
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/*! \brief Destructor.
|
|
||||||
*
|
|
||||||
* This calls clReleaseSampler() on the value held by this instance.
|
|
||||||
*/
|
|
||||||
~Sampler() { }
|
|
||||||
|
|
||||||
//! \brief Default constructor - initializes to NULL.
|
|
||||||
Sampler() { }
|
|
||||||
|
|
||||||
/*! \brief Constructs a Sampler in a specified context.
|
|
||||||
*
|
|
||||||
* Wraps clCreateSampler().
|
|
||||||
*/
|
|
||||||
Sampler(
|
|
||||||
const Context& context,
|
|
||||||
cl_bool normalized_coords,
|
|
||||||
cl_addressing_mode addressing_mode,
|
|
||||||
cl_filter_mode filter_mode,
|
|
||||||
cl_int* err = NULL)
|
|
||||||
{
|
|
||||||
cl_int error;
|
|
||||||
object_ = ::clCreateSampler(
|
|
||||||
context(),
|
|
||||||
normalized_coords,
|
|
||||||
addressing_mode,
|
|
||||||
filter_mode,
|
|
||||||
&error);
|
|
||||||
|
|
||||||
detail::errHandler(error, __CREATE_SAMPLER_ERR);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief Copy constructor - performs shallow copy.
|
|
||||||
*
|
|
||||||
* This calls clRetainSampler() on the parameter's cl_sampler.
|
|
||||||
*/
|
|
||||||
Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
|
|
||||||
|
|
||||||
/*! \brief Constructor from cl_sampler - takes ownership.
|
|
||||||
*
|
|
||||||
* This effectively transfers ownership of a refcount on the cl_sampler
|
|
||||||
* into the new Sampler object.
|
|
||||||
*/
|
|
||||||
Sampler(const cl_sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
|
|
||||||
|
|
||||||
/*! \brief Assignment operator from Sampler.
|
|
||||||
*
|
|
||||||
* This calls clRetainSampler() on the parameter and clReleaseSampler()
|
|
||||||
* on the previous value held by this instance.
|
|
||||||
*/
|
|
||||||
Sampler& operator = (const Sampler& rhs)
|
|
||||||
{
|
|
||||||
if (this != &rhs) {
|
|
||||||
detail::Wrapper<cl_type>::operator=(rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*! \brief Assignment operator from cl_sampler - takes ownership.
|
|
||||||
*
|
|
||||||
* This effectively transfers ownership of a refcount on the rhs and calls
|
|
||||||
* clReleaseSampler() on the value previously held by this instance.
|
|
||||||
*/
|
|
||||||
Sampler& operator = (const cl_sampler& rhs)
|
|
||||||
{
|
|
||||||
detail::Wrapper<cl_type>::operator=(rhs);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Wrapper for clGetSamplerInfo().
|
|
||||||
template <typename T>
|
|
||||||
cl_int getInfo(cl_sampler_info name, T* param) const
|
|
||||||
{
|
|
||||||
return detail::errHandler(
|
|
||||||
detail::getInfo(&::clGetSamplerInfo, object_, name, param),
|
|
||||||
__GET_SAMPLER_INFO_ERR);
|
|
||||||
}
|
|
||||||
|
|
||||||
//! \brief Wrapper for clGetSamplerInfo() that returns by value.
|
|
||||||
template <cl_int name> typename
|
|
||||||
detail::param_traits<detail::cl_sampler_info, name>::param_type
|
|
||||||
getInfo(cl_int* err = NULL) const
|
|
||||||
{
|
|
||||||
typename detail::param_traits<
|
|
||||||
detail::cl_sampler_info, name>::param_type param;
|
|
||||||
cl_int result = getInfo(name, ¶m);
|
|
||||||
if (err != NULL) {
|
|
||||||
*err = result;
|
|
||||||
}
|
|
||||||
return param;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class Program;
|
class Program;
|
||||||
class CommandQueue;
|
class CommandQueue;
|
||||||
class Kernel;
|
class Kernel;
|
||||||
@@ -6012,126 +5424,8 @@ public:
|
|||||||
}
|
}
|
||||||
#endif // #if defined(CL_VERSION_1_1)
|
#endif // #if defined(CL_VERSION_1_1)
|
||||||
|
|
||||||
cl_int enqueueAcquireGLObjects(
|
|
||||||
const VECTOR_CLASS<Memory>* mem_objects = NULL,
|
|
||||||
const VECTOR_CLASS<Event>* events = NULL,
|
|
||||||
Event* event = NULL) const
|
|
||||||
{
|
|
||||||
cl_event tmp;
|
|
||||||
cl_int err = detail::errHandler(
|
|
||||||
::clEnqueueAcquireGLObjects(
|
|
||||||
object_,
|
|
||||||
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
|
|
||||||
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
|
|
||||||
(events != NULL) ? (cl_uint) events->size() : 0,
|
|
||||||
(events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
|
|
||||||
(event != NULL) ? &tmp : NULL),
|
|
||||||
__ENQUEUE_ACQUIRE_GL_ERR);
|
|
||||||
|
|
||||||
if (event != NULL && err == CL_SUCCESS)
|
|
||||||
*event = tmp;
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int enqueueReleaseGLObjects(
|
|
||||||
const VECTOR_CLASS<Memory>* mem_objects = NULL,
|
|
||||||
const VECTOR_CLASS<Event>* events = NULL,
|
|
||||||
Event* event = NULL) const
|
|
||||||
{
|
|
||||||
cl_event tmp;
|
|
||||||
cl_int err = detail::errHandler(
|
|
||||||
::clEnqueueReleaseGLObjects(
|
|
||||||
object_,
|
|
||||||
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
|
|
||||||
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
|
|
||||||
(events != NULL) ? (cl_uint) events->size() : 0,
|
|
||||||
(events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
|
|
||||||
(event != NULL) ? &tmp : NULL),
|
|
||||||
__ENQUEUE_RELEASE_GL_ERR);
|
|
||||||
|
|
||||||
if (event != NULL && err == CL_SUCCESS)
|
|
||||||
*event = tmp;
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined (USE_DX_INTEROP)
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
|
|
||||||
cl_command_queue command_queue, cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list, cl_event* event);
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
|
|
||||||
cl_command_queue command_queue, cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list, cl_event* event);
|
|
||||||
|
|
||||||
cl_int enqueueAcquireD3D10Objects(
|
|
||||||
const VECTOR_CLASS<Memory>* mem_objects = NULL,
|
|
||||||
const VECTOR_CLASS<Event>* events = NULL,
|
|
||||||
Event* event = NULL) const
|
|
||||||
{
|
|
||||||
static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
|
|
||||||
#if defined(CL_VERSION_1_2)
|
|
||||||
cl_context context = getInfo<CL_QUEUE_CONTEXT>();
|
|
||||||
cl::Device device(getInfo<CL_QUEUE_DEVICE>());
|
|
||||||
cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
|
|
||||||
__INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR);
|
|
||||||
#endif
|
|
||||||
#if defined(CL_VERSION_1_1)
|
|
||||||
__INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
cl_event tmp;
|
|
||||||
cl_int err = detail::errHandler(
|
|
||||||
pfn_clEnqueueAcquireD3D10ObjectsKHR(
|
|
||||||
object_,
|
|
||||||
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
|
|
||||||
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
|
|
||||||
(events != NULL) ? (cl_uint) events->size() : 0,
|
|
||||||
(events != NULL) ? (cl_event*) &events->front() : NULL,
|
|
||||||
(event != NULL) ? &tmp : NULL),
|
|
||||||
__ENQUEUE_ACQUIRE_GL_ERR);
|
|
||||||
|
|
||||||
if (event != NULL && err == CL_SUCCESS)
|
|
||||||
*event = tmp;
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int enqueueReleaseD3D10Objects(
|
|
||||||
const VECTOR_CLASS<Memory>* mem_objects = NULL,
|
|
||||||
const VECTOR_CLASS<Event>* events = NULL,
|
|
||||||
Event* event = NULL) const
|
|
||||||
{
|
|
||||||
static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
|
|
||||||
#if defined(CL_VERSION_1_2)
|
|
||||||
cl_context context = getInfo<CL_QUEUE_CONTEXT>();
|
|
||||||
cl::Device device(getInfo<CL_QUEUE_DEVICE>());
|
|
||||||
cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
|
|
||||||
__INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR);
|
|
||||||
#endif // #if defined(CL_VERSION_1_2)
|
|
||||||
#if defined(CL_VERSION_1_1)
|
|
||||||
__INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
|
|
||||||
#endif // #if defined(CL_VERSION_1_1)
|
|
||||||
|
|
||||||
cl_event tmp;
|
|
||||||
cl_int err = detail::errHandler(
|
|
||||||
pfn_clEnqueueReleaseD3D10ObjectsKHR(
|
|
||||||
object_,
|
|
||||||
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
|
|
||||||
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
|
|
||||||
(events != NULL) ? (cl_uint) events->size() : 0,
|
|
||||||
(events != NULL) ? (cl_event*) &events->front() : NULL,
|
|
||||||
(event != NULL) ? &tmp : NULL),
|
|
||||||
__ENQUEUE_RELEASE_GL_ERR);
|
|
||||||
|
|
||||||
if (event != NULL && err == CL_SUCCESS)
|
|
||||||
*event = tmp;
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deprecated APIs for 1.2
|
* Deprecated APIs for 1.2
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
#define ATIDLAS_ARRAY_H_
|
#define ATIDLAS_ARRAY_H_
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <type_traits>
|
|
||||||
#include <CL/cl.hpp>
|
#include <CL/cl.hpp>
|
||||||
#include "atidlas/types.h"
|
#include "atidlas/types.h"
|
||||||
#include "atidlas/cl_ext/backend.h"
|
#include "atidlas/cl_ext/backend.h"
|
||||||
@@ -17,8 +16,7 @@ class scalar;
|
|||||||
class array: public array_base
|
class array: public array_base
|
||||||
{
|
{
|
||||||
friend array reshape(array const &, int_t, int_t);
|
friend array reshape(array const &, int_t, int_t);
|
||||||
template<class T>
|
|
||||||
struct is_array { enum{ value = std::is_same<T, array>::value || std::is_same<T, array_expression>::value}; };
|
|
||||||
public:
|
public:
|
||||||
//1D Constructors
|
//1D Constructors
|
||||||
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
|
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||||
|
@@ -20,7 +20,7 @@ enum leaf_t
|
|||||||
class mapped_object;
|
class mapped_object;
|
||||||
|
|
||||||
typedef std::pair<int_t, leaf_t> mapping_key;
|
typedef std::pair<int_t, leaf_t> mapping_key;
|
||||||
typedef std::map<mapping_key, std::shared_ptr<mapped_object> > mapping_type;
|
typedef std::map<mapping_key, tools::shared_ptr<mapped_object> > mapping_type;
|
||||||
|
|
||||||
/** @brief Mapped Object
|
/** @brief Mapped Object
|
||||||
*
|
*
|
||||||
|
@@ -4,11 +4,11 @@
|
|||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <CL/cl.hpp>
|
||||||
|
|
||||||
#include "atidlas/types.h"
|
#include "atidlas/types.h"
|
||||||
#include "atidlas/backend/parse.h"
|
#include "atidlas/backend/parse.h"
|
||||||
#include "atidlas/backend/stream.h"
|
#include "atidlas/backend/stream.h"
|
||||||
#include <CL/cl.hpp>
|
|
||||||
#include "atidlas/cl_ext/lazy_compiler.h"
|
#include "atidlas/cl_ext/lazy_compiler.h"
|
||||||
#include "atidlas/symbolic/expression.h"
|
#include "atidlas/symbolic/expression.h"
|
||||||
|
|
||||||
@@ -75,15 +75,15 @@ protected:
|
|||||||
/** @brief Accessor for the numeric type */
|
/** @brief Accessor for the numeric type */
|
||||||
numeric_type get_numeric_type(atidlas::array_expression const * array_expression, int_t root_idx) const;
|
numeric_type get_numeric_type(atidlas::array_expression const * array_expression, int_t root_idx) const;
|
||||||
/** @brief Creates a binary leaf */
|
/** @brief Creates a binary leaf */
|
||||||
template<class T> std::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
|
template<class T> tools::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
|
||||||
/** @brief Creates a value scalar mapping */
|
/** @brief Creates a value scalar mapping */
|
||||||
std::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
|
tools::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
|
||||||
/** @brief Creates a vector mapping */
|
/** @brief Creates a vector mapping */
|
||||||
std::shared_ptr<mapped_object> create(array_infos const &) const;
|
tools::shared_ptr<mapped_object> create(array_infos const &) const;
|
||||||
/** @brief Creates a tuple mapping */
|
/** @brief Creates a tuple mapping */
|
||||||
std::shared_ptr<mapped_object> create(repeat_infos const &) const;
|
tools::shared_ptr<mapped_object> create(repeat_infos const &) const;
|
||||||
/** @brief Creates a mapping */
|
/** @brief Creates a mapping */
|
||||||
std::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
|
tools::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
|
||||||
public:
|
public:
|
||||||
map_functor(symbolic_binder & binder, mapping_type & mapping);
|
map_functor(symbolic_binder & binder, mapping_type & mapping);
|
||||||
/** @brief Functor for traversing the tree */
|
/** @brief Functor for traversing the tree */
|
||||||
@@ -143,7 +143,7 @@ protected:
|
|||||||
static bool is_reduction(array_expression::node const & node);
|
static bool is_reduction(array_expression::node const & node);
|
||||||
static bool is_index_reduction(op_element const & op);
|
static bool is_index_reduction(op_element const & op);
|
||||||
|
|
||||||
std::shared_ptr<symbolic_binder> make_binder();
|
tools::shared_ptr<symbolic_binder> make_binder();
|
||||||
static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr);
|
static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr);
|
||||||
static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr);
|
static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr);
|
||||||
static std::string append_width(std::string const & str, unsigned int width);
|
static std::string append_width(std::string const & str, unsigned int width);
|
||||||
@@ -164,7 +164,7 @@ public:
|
|||||||
virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
|
virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
|
||||||
virtual void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
virtual void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
||||||
unsigned int label, controller<expressions_tuple> const & expressions) = 0;
|
unsigned int label, controller<expressions_tuple> const & expressions) = 0;
|
||||||
virtual std::shared_ptr<base> clone() const = 0;
|
virtual tools::shared_ptr<base> clone() const = 0;
|
||||||
private:
|
private:
|
||||||
binding_policy_t binding_policy_;
|
binding_policy_t binding_policy_;
|
||||||
};
|
};
|
||||||
@@ -180,7 +180,7 @@ public:
|
|||||||
base_impl(parameters_type const & parameters, binding_policy_t binding_policy);
|
base_impl(parameters_type const & parameters, binding_policy_t binding_policy);
|
||||||
int_t local_size_0() const;
|
int_t local_size_0() const;
|
||||||
int_t local_size_1() const;
|
int_t local_size_1() const;
|
||||||
std::shared_ptr<base> clone() const;
|
tools::shared_ptr<base> clone() const;
|
||||||
/** @brief returns whether or not the profile has undefined behavior on particular device */
|
/** @brief returns whether or not the profile has undefined behavior on particular device */
|
||||||
int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const;
|
int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const;
|
||||||
protected:
|
protected:
|
||||||
|
@@ -16,7 +16,7 @@ namespace atidlas
|
|||||||
|
|
||||||
class model
|
class model
|
||||||
{
|
{
|
||||||
typedef std::vector< std::shared_ptr<base> > templates_container;
|
typedef std::vector< tools::shared_ptr<base> > templates_container;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string define_extension(std::string const & extensions, std::string const & ext);
|
std::string define_extension(std::string const & extensions, std::string const & ext);
|
||||||
@@ -24,21 +24,21 @@ namespace atidlas
|
|||||||
std::vector<cl_ext::lazy_compiler>& init(controller<expressions_tuple> const &);
|
std::vector<cl_ext::lazy_compiler>& init(controller<expressions_tuple> const &);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
model(predictors::random_forest const &, std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||||
model(std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
model(std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||||
model(base const &, cl::CommandQueue &);
|
model(base const &, cl::CommandQueue &);
|
||||||
|
|
||||||
void execute(controller<expressions_tuple> const &);
|
void execute(controller<expressions_tuple> const &);
|
||||||
templates_container const & templates() const;
|
templates_container const & templates() const;
|
||||||
private:
|
private:
|
||||||
templates_container templates_;
|
templates_container templates_;
|
||||||
std::shared_ptr<predictors::random_forest> predictor_;
|
tools::shared_ptr<predictors::random_forest> predictor_;
|
||||||
std::map<std::vector<int_t>, int> hardcoded_;
|
std::map<std::vector<int_t>, int> hardcoded_;
|
||||||
std::map<cl_context, std::map<std::string, std::vector<cl_ext::lazy_compiler> > > lazy_programs_;
|
std::map<cl_context, std::map<std::string, std::vector<cl_ext::lazy_compiler> > > lazy_programs_;
|
||||||
cl::CommandQueue & queue_;
|
cl::CommandQueue & queue_;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<model> > model_map_t;
|
typedef std::map<std::pair<expression_type, numeric_type>, tools::shared_ptr<model> > model_map_t;
|
||||||
|
|
||||||
model_map_t init_models(cl::CommandQueue const & queue);
|
model_map_t init_models(cl::CommandQueue const & queue);
|
||||||
model_map_t& get_model_map(cl::CommandQueue & queue);
|
model_map_t& get_model_map(cl::CommandQueue & queue);
|
||||||
|
@@ -6,7 +6,8 @@
|
|||||||
#include <CL/cl.hpp>
|
#include <CL/cl.hpp>
|
||||||
#include "atidlas/types.h"
|
#include "atidlas/types.h"
|
||||||
#include "atidlas/value_scalar.h"
|
#include "atidlas/value_scalar.h"
|
||||||
#include <memory>
|
#include "atidlas/tools/shared_ptr.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
namespace atidlas
|
namespace atidlas
|
||||||
{
|
{
|
||||||
@@ -221,21 +222,14 @@ class operation_cache
|
|||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
|
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies);
|
||||||
{ l_.push_back({queue, kernel, offset, global, local, dependencies}); }
|
void enqueue(std::list<cl::Event>* events = NULL);
|
||||||
|
|
||||||
void enqueue(std::list<cl::Event>* events = NULL)
|
|
||||||
{
|
|
||||||
for(infos & i : l_){
|
|
||||||
events->push_back(cl::Event());
|
|
||||||
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::list<infos> l_;
|
std::list<infos> l_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct execution_options_type
|
struct execution_options_type
|
||||||
{
|
{
|
||||||
execution_options_type(unsigned int _queue_id = 0, std::list<cl::Event>* _events = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), events(_events), cache(_cache), dependencies(_dependencies){}
|
execution_options_type(unsigned int _queue_id = 0, std::list<cl::Event>* _events = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), events(_events), cache(_cache), dependencies(_dependencies){}
|
||||||
@@ -299,9 +293,9 @@ controller<TYPE> control(TYPE const & x, execution_options_type const& execution
|
|||||||
class expressions_tuple
|
class expressions_tuple
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<array_expression> create(array_expression const & s);
|
tools::shared_ptr<array_expression> create(array_expression const & s);
|
||||||
public:
|
public:
|
||||||
typedef std::list<std::shared_ptr<array_expression> > data_type;
|
typedef std::list<tools::shared_ptr<array_expression> > data_type;
|
||||||
enum order_type { SEQUENTIAL, INDEPENDENT };
|
enum order_type { SEQUENTIAL, INDEPENDENT };
|
||||||
|
|
||||||
expressions_tuple(array_expression const & s0);
|
expressions_tuple(array_expression const & s0);
|
||||||
|
162
include/atidlas/tools/shared_ptr.hpp
Normal file
162
include/atidlas/tools/shared_ptr.hpp
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
#ifndef ATIDLAS_TOOLS_SHARED_PTR_HPP
|
||||||
|
#define ATIDLAS_TOOLS_SHARED_PTR_HPP
|
||||||
|
|
||||||
|
/* =========================================================================
|
||||||
|
Copyright (c) 2010-2012, Institute for Microelectronics,
|
||||||
|
Institute for Analysis and Scientific Computing,
|
||||||
|
TU Wien.
|
||||||
|
Portions of this software are copyright by UChicago Argonne, LLC.
|
||||||
|
|
||||||
|
-----------------
|
||||||
|
ViennaCL - The Vienna Computing Library
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
|
||||||
|
|
||||||
|
(A list of authors and contributors can be found in the PDF manual)
|
||||||
|
|
||||||
|
License: MIT (X11), see file LICENSE in the base directory
|
||||||
|
============================================================================= */
|
||||||
|
|
||||||
|
/** @file tools/shared_ptr.hpp
|
||||||
|
@brief Implementation of a shared pointer class (cf. tools::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
|
||||||
|
|
||||||
|
Contributed by Philippe Tillet.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace atidlas
|
||||||
|
{
|
||||||
|
namespace tools
|
||||||
|
{
|
||||||
|
namespace detail
|
||||||
|
{
|
||||||
|
|
||||||
|
/** @brief Reference counting class for the shared_ptr implementation */
|
||||||
|
class count
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
count(unsigned int val) : val_(val){ }
|
||||||
|
void dec(){ --val_; }
|
||||||
|
void inc(){ ++val_; }
|
||||||
|
bool is_null(){ return val_ == 0; }
|
||||||
|
unsigned int val(){ return val_; }
|
||||||
|
private:
|
||||||
|
unsigned int val_;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Interface for the reference counter inside the shared_ptr */
|
||||||
|
struct aux
|
||||||
|
{
|
||||||
|
detail::count count;
|
||||||
|
|
||||||
|
aux() :count(1) {}
|
||||||
|
virtual void destroy()=0;
|
||||||
|
virtual ~aux() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Implementation helper for the reference counting mechanism inside shared_ptr. */
|
||||||
|
template<class U, class Deleter>
|
||||||
|
struct auximpl: public detail::aux
|
||||||
|
{
|
||||||
|
U* p;
|
||||||
|
Deleter d;
|
||||||
|
|
||||||
|
auximpl(U* pu, Deleter x) :p(pu), d(x) {}
|
||||||
|
virtual void destroy() { d(p); }
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Default deleter class for a pointer. The default is to just call 'delete' on the pointer. Provide your own implementations for 'delete[]' and 'free'. */
|
||||||
|
template<class U>
|
||||||
|
struct default_deleter
|
||||||
|
{
|
||||||
|
void operator()(U* p) const { delete p; }
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class shared_ptr_base
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
detail::aux* pa;
|
||||||
|
public:
|
||||||
|
unsigned int count() { return pa->count.val(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependency. Will be replaced by tools::shared_ptr as soon as C++11 is widely available. */
|
||||||
|
template<class T>
|
||||||
|
class shared_ptr : public shared_ptr_base
|
||||||
|
{
|
||||||
|
template<class U>
|
||||||
|
friend class shared_ptr;
|
||||||
|
|
||||||
|
detail::aux* pa;
|
||||||
|
T* pt;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
shared_ptr() :pa(NULL), pt(NULL) {}
|
||||||
|
|
||||||
|
template<class U, class Deleter>
|
||||||
|
shared_ptr(U* pu, Deleter d) : pa(new detail::auximpl<U, Deleter>(pu, d)), pt(pu) {}
|
||||||
|
|
||||||
|
template<class U>
|
||||||
|
explicit shared_ptr(U* pu) : pa(new detail::auximpl<U, detail::default_deleter<U> >(pu, detail::default_deleter<U>())), pt(pu) {}
|
||||||
|
|
||||||
|
template<class U>
|
||||||
|
shared_ptr(const shared_ptr<U>& s) :pa(s.pa), pt(s.pt) { inc(); }
|
||||||
|
|
||||||
|
shared_ptr(const shared_ptr& s) :pa(s.pa), pt(s.pt) { inc(); }
|
||||||
|
~shared_ptr() { dec(); }
|
||||||
|
|
||||||
|
T* get() const { return pt; }
|
||||||
|
T* operator->() const { return pt; }
|
||||||
|
T& operator*() const { return *pt; }
|
||||||
|
|
||||||
|
void reset() { shared_ptr<T>().swap(*this); }
|
||||||
|
void reset(T * ptr) { shared_ptr<T>(ptr).swap(*this); }
|
||||||
|
|
||||||
|
void swap(shared_ptr<T> & other)
|
||||||
|
{
|
||||||
|
std::swap(pt,other.pt);
|
||||||
|
std::swap(pa, other.pa);
|
||||||
|
}
|
||||||
|
|
||||||
|
shared_ptr& operator=(const shared_ptr& s)
|
||||||
|
{
|
||||||
|
if (this!=&s)
|
||||||
|
{
|
||||||
|
dec();
|
||||||
|
pa = s.pa;
|
||||||
|
pt = s.pt;
|
||||||
|
inc();
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void inc()
|
||||||
|
{
|
||||||
|
if (pa) pa->count.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
void dec()
|
||||||
|
{
|
||||||
|
if (pa)
|
||||||
|
{
|
||||||
|
pa->count.dec();
|
||||||
|
if (pa->count.is_null())
|
||||||
|
{
|
||||||
|
pa->destroy();
|
||||||
|
delete pa;
|
||||||
|
pa = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@@ -30,44 +30,44 @@ numeric_type base::map_functor::get_numeric_type(atidlas::array_expression const
|
|||||||
|
|
||||||
/** @brief Binary leaf */
|
/** @brief Binary leaf */
|
||||||
template<class T>
|
template<class T>
|
||||||
std::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
|
tools::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
|
||||||
{
|
{
|
||||||
return std::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
|
return tools::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @brief Scalar mapping */
|
/** @brief Scalar mapping */
|
||||||
std::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
|
tools::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
|
||||||
{
|
{
|
||||||
std::string strdtype = numeric_type_to_string(dtype);
|
std::string strdtype = numeric_type_to_string(dtype);
|
||||||
return std::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
|
return tools::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @brief Vector mapping */
|
/** @brief Vector mapping */
|
||||||
std::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
|
tools::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
|
||||||
{
|
{
|
||||||
std::string dtype = numeric_type_to_string(a.dtype);
|
std::string dtype = numeric_type_to_string(a.dtype);
|
||||||
unsigned int id = binder_.get(a.data);
|
unsigned int id = binder_.get(a.data);
|
||||||
//Scalar
|
//Scalar
|
||||||
if(a.shape1==1 && a.shape2==1)
|
if(a.shape1==1 && a.shape2==1)
|
||||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
|
||||||
//Column vector
|
//Column vector
|
||||||
else if(a.shape1>1 && a.shape2==1)
|
else if(a.shape1>1 && a.shape2==1)
|
||||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
|
||||||
//Row vector
|
//Row vector
|
||||||
else if(a.shape1==1 && a.shape2>1)
|
else if(a.shape1==1 && a.shape2>1)
|
||||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
|
||||||
//Matrix
|
//Matrix
|
||||||
else
|
else
|
||||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
|
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
|
tools::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
|
||||||
{
|
{
|
||||||
//TODO: Make it less specific!
|
//TODO: Make it less specific!
|
||||||
return std::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
|
return tools::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
|
tools::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
|
||||||
{
|
{
|
||||||
switch(lhs_rhs.type_family)
|
switch(lhs_rhs.type_family)
|
||||||
{
|
{
|
||||||
@@ -111,7 +111,7 @@ void base::map_functor::operator()(atidlas::array_expression const & array_expre
|
|||||||
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)
|
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)
|
||||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_outer>(&array_expression, root_idx, &mapping_)));
|
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_outer>(&array_expression, root_idx, &mapping_)));
|
||||||
else if (detail::is_cast(root_node.op))
|
else if (detail::is_cast(root_node.op))
|
||||||
mapping_.insert(mapping_type::value_type(key, std::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
|
mapping_.insert(mapping_type::value_type(key, tools::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -280,7 +280,7 @@ std::string base::generate_arguments(std::string const & data_type, std::vector<
|
|||||||
|
|
||||||
void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg)
|
void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg)
|
||||||
{
|
{
|
||||||
std::shared_ptr<symbolic_binder> binder = make_binder();
|
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||||
for (const auto & elem : expressions.data())
|
for (const auto & elem : expressions.data())
|
||||||
traverse(*elem, (elem)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
|
traverse(*elem, (elem)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
|
||||||
}
|
}
|
||||||
@@ -478,12 +478,12 @@ unsigned int base::align(unsigned int to_round, unsigned int base)
|
|||||||
return (to_round + base - 1)/base * base;
|
return (to_round + base - 1)/base * base;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<symbolic_binder> base::make_binder()
|
tools::shared_ptr<symbolic_binder> base::make_binder()
|
||||||
{
|
{
|
||||||
if (binding_policy_==BIND_TO_HANDLE)
|
if (binding_policy_==BIND_TO_HANDLE)
|
||||||
return std::shared_ptr<symbolic_binder>(new bind_to_handle());
|
return tools::shared_ptr<symbolic_binder>(new bind_to_handle());
|
||||||
else
|
else
|
||||||
return std::shared_ptr<symbolic_binder>(new bind_all_unique());
|
return tools::shared_ptr<symbolic_binder>(new bind_all_unique());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -509,7 +509,7 @@ std::vector<std::string> base::generate(unsigned int label, expressions_tuple co
|
|||||||
|
|
||||||
//Create mapping
|
//Create mapping
|
||||||
std::vector<mapping_type> mappings(expressions.data().size());
|
std::vector<mapping_type> mappings(expressions.data().size());
|
||||||
std::shared_ptr<symbolic_binder> binder = make_binder();
|
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||||
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit)
|
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit)
|
||||||
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
|
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
|
||||||
|
|
||||||
@@ -533,8 +533,8 @@ int_t base_impl<TType, PType>::local_size_1() const
|
|||||||
{ return p_.local_size_1; }
|
{ return p_.local_size_1; }
|
||||||
|
|
||||||
template<class TType, class PType>
|
template<class TType, class PType>
|
||||||
std::shared_ptr<base> base_impl<TType, PType>::clone() const
|
tools::shared_ptr<base> base_impl<TType, PType>::clone() const
|
||||||
{ return std::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
|
{ return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
|
||||||
|
|
||||||
template<class TType, class PType>
|
template<class TType, class PType>
|
||||||
int base_impl<TType, PType>::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const
|
int base_impl<TType, PType>::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const
|
||||||
|
@@ -587,7 +587,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
|||||||
kernel.setArg(current_arg++, cl_uint(N));
|
kernel.setArg(current_arg++, cl_uint(N));
|
||||||
kernel.setArg(current_arg++, cl_uint(K));
|
kernel.setArg(current_arg++, cl_uint(K));
|
||||||
|
|
||||||
std::shared_ptr<symbolic_binder> binder = make_binder();
|
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||||
set_arguments_functor fun(*binder, current_arg, kernel);
|
set_arguments_functor fun(*binder, current_arg, kernel);
|
||||||
fun.set_arguments(C);
|
fun.set_arguments(C);
|
||||||
fun.set_arguments(alpha.dtype(), alpha.values());
|
fun.set_arguments(alpha.dtype(), alpha.values());
|
||||||
|
@@ -82,11 +82,11 @@ std::vector<cl_ext::lazy_compiler>& model::init(controller<expressions_tuple> co
|
|||||||
return to_init;
|
return to_init;
|
||||||
}
|
}
|
||||||
|
|
||||||
model::model(predictors::random_forest const & predictor, std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
|
model::model(predictors::random_forest const & predictor, std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
|
||||||
templates_(templates), predictor_(new predictors::random_forest(predictor)), queue_(queue)
|
templates_(templates), predictor_(new predictors::random_forest(predictor)), queue_(queue)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
model::model(std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
|
model::model(std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
|
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
|
||||||
@@ -158,27 +158,27 @@ namespace detail
|
|||||||
throw std::invalid_argument("Invalid datatype: " + name);
|
throw std::invalid_argument("Invalid datatype: " + name);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
|
static tools::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
|
||||||
{
|
{
|
||||||
fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS};
|
fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS};
|
||||||
if(template_name=="vaxpy")
|
if(template_name=="vaxpy")
|
||||||
return std::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
|
return tools::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
|
||||||
else if(template_name=="dot")
|
else if(template_name=="dot")
|
||||||
return std::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
|
return tools::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
|
||||||
else if(template_name=="maxpy")
|
else if(template_name=="maxpy")
|
||||||
return std::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
|
return tools::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
|
||||||
else if(template_name.find("gemvN")!=std::string::npos)
|
else if(template_name.find("gemvN")!=std::string::npos)
|
||||||
return std::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
return tools::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||||
else if(template_name.find("gemvT")!=std::string::npos)
|
else if(template_name.find("gemvT")!=std::string::npos)
|
||||||
return std::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
return tools::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||||
else if(template_name.find("gemmNN")!=std::string::npos)
|
else if(template_name.find("gemmNN")!=std::string::npos)
|
||||||
return std::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
return tools::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||||
else if(template_name.find("gemmTN")!=std::string::npos)
|
else if(template_name.find("gemmTN")!=std::string::npos)
|
||||||
return std::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
return tools::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||||
else if(template_name.find("gemmNT")!=std::string::npos)
|
else if(template_name.find("gemmNT")!=std::string::npos)
|
||||||
return std::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
return tools::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||||
else if(template_name.find("gemmTT")!=std::string::npos)
|
else if(template_name.find("gemmTT")!=std::string::npos)
|
||||||
return std::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
return tools::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||||
else
|
else
|
||||||
throw std::invalid_argument("Invalid expression: " + template_name);
|
throw std::invalid_argument("Invalid expression: " + template_name);
|
||||||
}
|
}
|
||||||
@@ -214,7 +214,7 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
|
|||||||
numeric_type dtype = detail::get_dtype(elem);
|
numeric_type dtype = detail::get_dtype(elem);
|
||||||
|
|
||||||
// Get profiles
|
// Get profiles
|
||||||
std::vector<std::shared_ptr<base> > templates;
|
std::vector<tools::shared_ptr<base> > templates;
|
||||||
js::Value const & profiles = document[opcstr][dtcstr]["profiles"];
|
js::Value const & profiles = document[opcstr][dtcstr]["profiles"];
|
||||||
for (js::SizeType id = 0 ; id < profiles.Size() ; ++id)
|
for (js::SizeType id = 0 ; id < profiles.Size() ; ++id)
|
||||||
templates.push_back(detail::create(operation, tools::to_int_array<int>(profiles[id])));
|
templates.push_back(detail::create(operation, tools::to_int_array<int>(profiles[id])));
|
||||||
@@ -222,10 +222,10 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
|
|||||||
{
|
{
|
||||||
// Get predictor
|
// Get predictor
|
||||||
predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]);
|
predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]);
|
||||||
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(predictor, templates, queue));
|
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(predictor, templates, queue));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(templates, queue));
|
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(templates, queue));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -235,7 +235,7 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
|
|||||||
model_map_t init_models(cl::CommandQueue & queue)
|
model_map_t init_models(cl::CommandQueue & queue)
|
||||||
{
|
{
|
||||||
model_map_t res;
|
model_map_t res;
|
||||||
typedef std::shared_ptr<model> ptr_t;
|
typedef tools::shared_ptr<model> ptr_t;
|
||||||
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
||||||
|
|
||||||
for(auto DTYPE : types){
|
for(auto DTYPE : types){
|
||||||
|
@@ -173,30 +173,30 @@ namespace atidlas
|
|||||||
|
|
||||||
/*----Parse required temporaries-----*/
|
/*----Parse required temporaries-----*/
|
||||||
detail::parse(tree, rootidx, current_type, breakpoints, final_type);
|
detail::parse(tree, rootidx, current_type, breakpoints, final_type);
|
||||||
std::vector<std::shared_ptr<array> > temporaries_;
|
std::vector<tools::shared_ptr<array> > temporaries_;
|
||||||
|
|
||||||
/*----Compute required temporaries----*/
|
/*----Compute required temporaries----*/
|
||||||
for(detail::breakpoints_t::reverse_iterator rit = breakpoints.rbegin() ; rit != breakpoints.rend() ; ++rit)
|
for(detail::breakpoints_t::reverse_iterator rit = breakpoints.rbegin() ; rit != breakpoints.rend() ; ++rit)
|
||||||
{
|
{
|
||||||
std::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
|
tools::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
|
||||||
array_expression::node const & node = tree[rit->second->node_index];
|
array_expression::node const & node = tree[rit->second->node_index];
|
||||||
array_expression::node const & lmost = lhs_most(tree, node);
|
array_expression::node const & lmost = lhs_most(tree, node);
|
||||||
|
|
||||||
//Creates temporary
|
//Creates temporary
|
||||||
std::shared_ptr<array> tmp;
|
tools::shared_ptr<array> tmp;
|
||||||
switch(rit->first){
|
switch(rit->first){
|
||||||
case SCALAR_AXPY_TYPE:
|
case SCALAR_AXPY_TYPE:
|
||||||
case REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(1, dtype, context)); break;
|
case REDUCTION_TYPE: tmp = tools::shared_ptr<array>(new array(1, dtype, context)); break;
|
||||||
|
|
||||||
case VECTOR_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
case VECTOR_AXPY_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
||||||
case ROW_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
case ROW_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
||||||
case COL_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape2, dtype, context)); break;
|
case COL_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape2, dtype, context)); break;
|
||||||
|
|
||||||
case MATRIX_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
|
case MATRIX_AXPY_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
|
||||||
case MATRIX_PRODUCT_NN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
|
case MATRIX_PRODUCT_NN_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
|
||||||
case MATRIX_PRODUCT_NT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
|
case MATRIX_PRODUCT_NT_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
|
||||||
case MATRIX_PRODUCT_TN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
|
case MATRIX_PRODUCT_TN_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
|
||||||
case MATRIX_PRODUCT_TT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
|
case MATRIX_PRODUCT_TT_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
|
||||||
|
|
||||||
default: throw std::invalid_argument("Unrecognized operation");
|
default: throw std::invalid_argument("Unrecognized operation");
|
||||||
}
|
}
|
||||||
|
@@ -176,11 +176,24 @@ array_expression array_expression::operator-()
|
|||||||
array_expression array_expression::operator!()
|
array_expression array_expression::operator!()
|
||||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
|
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
|
||||||
|
|
||||||
|
//
|
||||||
|
void operation_cache::push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
|
||||||
|
{
|
||||||
|
l_.push_back({queue, kernel, offset, global, local, dependencies});
|
||||||
|
}
|
||||||
|
|
||||||
|
void operation_cache::enqueue(std::list<cl::Event>* events)
|
||||||
|
{
|
||||||
|
for(infos & i : l_){
|
||||||
|
events->push_back(cl::Event());
|
||||||
|
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
std::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
|
tools::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
|
||||||
{
|
{
|
||||||
return std::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
|
return tools::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
|
||||||
}
|
}
|
||||||
|
|
||||||
expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order)
|
expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order)
|
||||||
|
@@ -143,7 +143,7 @@ def do_tuning(args):
|
|||||||
A = atd.empty(sizes, datatype, context=context)
|
A = atd.empty(sizes, datatype, context=context)
|
||||||
C = atd.empty(sizes, datatype, context=context)
|
C = atd.empty(sizes, datatype, context=context)
|
||||||
return execute(A + C, sizes, Template, parameters, fname)
|
return execute(A + C, sizes, Template, parameters, fname)
|
||||||
tune(execution_handler, 100, 5000, 2, (),'log', 'log')
|
tune(execution_handler, 64, 5000, 2, (),'log', 'log')
|
||||||
#Row-wise dot
|
#Row-wise dot
|
||||||
if operation=='gemv':
|
if operation=='gemv':
|
||||||
for A_trans in args.gemv_layouts:
|
for A_trans in args.gemv_layouts:
|
||||||
@@ -152,7 +152,7 @@ def do_tuning(args):
|
|||||||
x = atd.empty(sizes[1], datatype, context=context)
|
x = atd.empty(sizes[1], datatype, context=context)
|
||||||
LHS = A if A_trans=='N' else A.T
|
LHS = A if A_trans=='N' else A.T
|
||||||
return execute(atd.dot(LHS, x), sizes, Template[A_trans], parameters, fname)
|
return execute(atd.dot(LHS, x), sizes, Template[A_trans], parameters, fname)
|
||||||
tune(execution_handler, 100, 5000, 2, (A_trans,),'log', 'log')
|
tune(execution_handler, 64, 6000, 2, (A_trans,),'log', 'log')
|
||||||
#Matrix Product
|
#Matrix Product
|
||||||
if operation=='gemm':
|
if operation=='gemm':
|
||||||
for L in args.gemm_layouts:
|
for L in args.gemm_layouts:
|
||||||
@@ -194,7 +194,7 @@ class ArgumentsHandler:
|
|||||||
|
|
||||||
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
|
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
|
||||||
full_parser.add_argument("--build-model", default=True, type=bool)
|
full_parser.add_argument("--build-model", default=True, type=bool)
|
||||||
full_parser.add_argument("--sample-size", default=30, type=int)
|
full_parser.add_argument("--sample-size", default=60, type=int)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
self.__dict__ = args.__dict__.copy()
|
self.__dict__ = args.__dict__.copy()
|
||||||
|
@@ -218,11 +218,16 @@ def benchmark(template, symbolic):
|
|||||||
raise ValueError("Template has too low occupancy")
|
raise ValueError("Template has too low occupancy")
|
||||||
else:
|
else:
|
||||||
queue.models[template, atd.float32] = atd.model(template, queue)
|
queue.models[template, atd.float32] = atd.model(template, queue)
|
||||||
x = atd.array(symbolic)
|
|
||||||
atd.synchronize(symbolic.context)
|
|
||||||
x, events, cache = atd.flush(symbolic)
|
x, events, cache = atd.flush(symbolic)
|
||||||
atd.synchronize(symbolic.context)
|
atd.synchronize(symbolic.context)
|
||||||
return 1e-9*sum([e.end - e.start for e in events])
|
timings = []
|
||||||
|
current_time = 0
|
||||||
|
while current_time < 1e-3:
|
||||||
|
x, events, cache = atd.flush(symbolic)
|
||||||
|
atd.synchronize(symbolic.context)
|
||||||
|
timings.append(1e-9*sum([e.end - e.start for e in events]))
|
||||||
|
current_time = current_time + timings[-1]
|
||||||
|
return np.median(timings)
|
||||||
|
|
||||||
|
|
||||||
def sanitize_string(string, keep_chars = ['_']):
|
def sanitize_string(string, keep_chars = ['_']):
|
||||||
|
@@ -193,7 +193,7 @@ namespace boost
|
|||||||
|
|
||||||
#if !defined(BOOST_NO_CXX11_SMART_PTR)
|
#if !defined(BOOST_NO_CXX11_SMART_PTR)
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline std::size_t hash_value(std::shared_ptr<T> const& x) {
|
inline std::size_t hash_value(tools::shared_ptr<T> const& x) {
|
||||||
return boost::hash_value(x.get());
|
return boost::hash_value(x.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -36,7 +36,7 @@ template<class T> T * get_pointer( std::unique_ptr<T> const& p )
|
|||||||
return p.get();
|
return p.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T> T * get_pointer( std::shared_ptr<T> const& p )
|
template<class T> T * get_pointer( tools::shared_ptr<T> const& p )
|
||||||
{
|
{
|
||||||
return p.get();
|
return p.get();
|
||||||
}
|
}
|
||||||
|
@@ -315,15 +315,14 @@ namespace detail
|
|||||||
boost::shared_ptr<cl::Context> make_context(cl::Device const & dev)
|
boost::shared_ptr<cl::Context> make_context(cl::Device const & dev)
|
||||||
{ return boost::shared_ptr<cl::Context>(new cl::Context(std::vector<cl::Device>(1, dev))); }
|
{ return boost::shared_ptr<cl::Context>(new cl::Context(std::vector<cl::Device>(1, dev))); }
|
||||||
|
|
||||||
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile)
|
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
|
||||||
{
|
{
|
||||||
std::list<cl::Event> events;
|
std::list<cl::Event> events;
|
||||||
atd::operation_cache cache;
|
atd::operation_cache cache;
|
||||||
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
|
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
|
||||||
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
|
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
|
||||||
atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile))));
|
atd::dispatcher_options_type(tune, label), atd::compilation_options_type(program_name, force_recompile))));
|
||||||
|
return bp::make_tuple(parray, to_list(events.begin(), events.end()), cache);
|
||||||
return bp::make_tuple(*parray, to_list(events.begin(), events.end()), cache);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -402,7 +401,7 @@ void export_cl()
|
|||||||
bp::def("synchronize", &atd::cl_ext::synchronize);
|
bp::def("synchronize", &atd::cl_ext::synchronize);
|
||||||
bp::def("get_platforms", &detail::get_platforms);
|
bp::def("get_platforms", &detail::get_platforms);
|
||||||
|
|
||||||
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
|
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
|
||||||
|
|
||||||
bp::class_<state_type>("state_type")
|
bp::class_<state_type>("state_type")
|
||||||
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
|
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
|
||||||
|
@@ -10,7 +10,7 @@ namespace ad = atidlas;
|
|||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
viennacl::vector<float> x(10000), y(10000), z(10000);
|
viennacl::vector<float> x(10000), y(10000), z(10000);
|
||||||
std::map<std::string, ad::std::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
|
std::map<std::string, ad::tools::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
|
||||||
models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
|
models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
|
||||||
models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
|
models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
|
Reference in New Issue
Block a user