removing C++11 interface

This commit is contained in:
Philippe Tillet
2015-02-08 23:19:38 -05:00
parent 85fb438806
commit a6d7671831
21 changed files with 423 additions and 956 deletions

View File

@@ -33,7 +33,7 @@ else()
endif()
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR} -std=c++11")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR}")
foreach(PROG blas overhead)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
if(CUDA_FOUND)
@@ -44,7 +44,7 @@ foreach(PROG blas overhead)
cuda_add_cublas_to_target(${PROG}-bench)
else()
add_executable(${PROG}-bench ${PROG}.cpp)
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR} -std=c++11")
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR}")
endif()
target_link_libraries(${PROG}-bench ${BLAS_LIBS})
endforeach(PROG)

View File

@@ -1,5 +1,6 @@
#include "atidlas/array.h"
#include "atidlas/symbolic/execute.h"
#include "atidlas/tools/timer.hpp"
#include "common.hpp"
#ifdef BENCH_CLAMDBLAS
#include "clAmdBlas.h"
@@ -13,27 +14,89 @@
#include <iomanip>
#include <stdlib.h>
#include <cmath>
#include <chrono>
#include <numeric>
namespace ad = atidlas;
typedef ad::int_t int_t;
template<class T>
void bench(ad::numeric_type dtype)
int ceil(int N, int pad)
{
unsigned int dtsize = ad::size_of(dtype);
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
return (N%pad==0)?N:(N+pad-1)/pad*pad;
}
std::vector<int> create_log_range(int min, int max, int N, int pad)
{
std::vector<int> res(N);
for(int i = 0 ; i < N ; ++i)
{
res[i] = std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N);
res[i] = ceil(res[i], pad);
}
return res;
}
std::vector<int> create_full_range(int min, int max, int pad)
{
std::vector<int> N;
for(int i = ceil(min, pad) ; i < ceil(max, pad) ; i+=pad)
N.push_back(i);
return N;
}
template <typename T>
class make_vector {
public:
typedef make_vector<T> my_type;
my_type& operator<< (const T& val) {
data_.push_back(val);
return *this;
}
operator std::vector<T>() const {
return data_;
}
private:
std::vector<T> data_;
};
template<class T>
T median(std::vector<T> x)
{
size_t size = x.size();
std::sort(x.begin(), x.end());
if (size % 2 == 0)
return (x[size / 2 - 1] + x[size / 2]) / 2;
else
return x[size / 2];
}
template<class T>
T mean(std::vector<T> x)
{
T res = 0;
int N = x.size();
for(int i = 0 ; i < N ; ++i)
res += x[i];
return res/N;
}
static double time_event(unsigned long sum, cl::Event const & e)
{ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_START>();}
template<class T>
void bench(ad::numeric_type dtype){
#define BENCHMARK_ATIDLAS(OP, PERF) \
{\
std::vector<long> times;\
double total_time = 0;\
while(total_time*1e-9 < 1e-1){\
while(total_time*1e-9 < 1e-2){\
std::list<cl::Event> events;\
OP;\
queue.finish();\
times.push_back(std::accumulate(events.begin(), events.end(), 0, \
[](unsigned long sum, cl::Event const & e){ return sum + e.getProfilingInfo<CL_PROFILING_COMMAND_END>() - e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();}));\
times.push_back(std::accumulate(events.begin(), events.end(), 0, &time_event));\
total_time+=times.back();\
}\
double t = median(times);\
@@ -44,11 +107,11 @@ void bench(ad::numeric_type dtype)
{\
std::vector<long> times;\
double total_time = 0;\
while(total_time*1e-9 < 1e-1){\
while(total_time*1e-9 < 1e-2){\
cl::Event event;\
OP;\
queue.finish();\
times.push_back(event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>());\
times.push_back(event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_START>());\
total_time+=times.back();\
}\
double t = median(times);\
@@ -57,11 +120,11 @@ void bench(ad::numeric_type dtype)
#define BENCHMARK_HOST(OP, PERF) \
{\
ad::tools::timer tmr;\
std::vector<int> cache_flusher(10000000, 0);\
auto start = std::chrono::steady_clock::now();\
tmr.start();\
OP;\
auto end = std::chrono::steady_clock::now();\
double t = std::chrono::duration<double, std::nano>(end - start).count();\
double t = 1e9*tmr.get();\
std::cout << " " << PERF << std::flush;\
}
@@ -86,68 +149,49 @@ void bench(ad::numeric_type dtype)
std::cout << " " << PERF << std::flush;\
}
/*---------*/
/*--BLAS1--*/
/*---------*/
std::cout << "#AXPY" << std::endl;
for(int_t N : create_log_range(1e3, 2e7, 50, 64))
{
std::cout << N;
ad::array x(N, dtype), y(N, dtype);
/* ATIDLAS */
y = x + y; queue.flush(); queue.finish();
BENCHMARK_ATIDLAS(y = ad::control(x + y, ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)), 3*N*dtsize/t)
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
ad::copy(x, cx);
ad::copy(y, cy);
BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
#endif
/* CuBLAS */
#ifdef BENCH_CUBLAS
T *cux, *cuy;
cudaMalloc((void**) &cux, N * sizeof(T));
cudaMalloc((void**) &cuy, N * sizeof(T));
BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
cudaFree(cux);
cudaFree(cuy);
#endif
std::cout << std::endl;
}
std::cout << "\n\n" << std::flush;
unsigned int dtsize = ad::size_of(dtype);
cl::CommandQueue & queue = ad::cl_ext::queues[ad::cl_ext::default_context()][0];
// std::cout << "#DOT" << std::endl;
// BLAS1 Sizes
static const std::vector<int> BLAS1_N = create_log_range(1e3, 2e7, 50, 64);
// BLAS2 Sizes
static const std::vector<int> BLAS2_N = make_vector<int>() << 64;
static const std::vector<int> BLAS2_M = create_full_range(128, 10000, 64);
// BLAS3 Sizes
static const std::vector<int> BLAS3_M = make_vector<int>() << 1024;
static const std::vector<int> BLAS3_N = make_vector<int>() << 128;
static const std::vector<int> BLAS3_K = create_full_range(128, 5000, 64);
// /*---------*/
// /*--BLAS1--*/
// /*---------*/
// std::cout << "#AXPY" << std::endl;
// for(int_t N : create_log_range(1e3, 2e7, 50, 64))
// {
// std::cout << N;
// /* ATIDLAS */
// ad::array x(N, dtype), y(N, dtype);
// ad::array scratch(N, dtype);
// ad::scalar s(dtype);
// s = dot(x,y); queue.flush(); queue.finish();
// BENCHMARK_OPENCL(s = ad::controller<atidlas::array_expression>(dot(x,y), ad::execution_options_type(0, &event)), 2*N*dtsize/t)
// /* ATIDLAS */
// y = x + y; queue.finish();
// BENCHMARK_ATIDLAS(y = ad::control(x + y, ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)), 3*N*dtsize/t)
// /* clAmdBlas */
//#ifdef BENCH_CLAMDBLAS
// BENCHMARK_OPENCL(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &queue(), 0, NULL, &event()), 2*N*dtsize/t)
// BENCHMARK_CLAMDBLAS(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &queue(), 0, NULL, &event()), 3*N*dtsize/t)
//#endif
// /* BLAS */
//#ifdef BENCH_CBLAS
// std::vector<float> cx(N), cy(N);
// ad::copy(x, cx);
// ad::copy(y, cy);
// BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
// BENCHMARK_HOST(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
//#endif
// /* CuBLAS */
//#ifdef BENCH_CUBLAS
// T *cux, *cuy;
// T result;
// cudaMalloc((void**) &cux, N * sizeof(T));
// cudaMalloc((void**) &cuy, N * sizeof(T));
// BENCHMARK_CUDA(cublasSdot(N, cux, 1, cuy, 1, &result), 2*N*dtsize/t)
// BENCHMARK_CUDA(cublasSaxpy(N, 2, cux, 1, cuy, 1), 3*N*dtsize/t)
// cudaFree(cux);
// cudaFree(cuy);
//#endif
@@ -155,21 +199,56 @@ void bench(ad::numeric_type dtype)
// }
// std::cout << "\n\n" << std::flush;
std::cout << "#DOT" << std::endl;
for(int_t i = 0 ; i < BLAS1_N.size() ; ++i)
{
int_t N = BLAS1_N[i];
std::cout << N;
/* ATIDLAS */
ad::array x(N, dtype), y(N, dtype);
ad::array scratch(N, dtype);
ad::scalar s(dtype);
s = dot(x,y); queue.finish();
BENCHMARK_ATIDLAS(s = ad::control(dot(x,y), ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)), 2*N*dtsize/t)
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK_CLAMDBLAS(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &queue(), 0, NULL, &event()), 2*N*dtsize/t)
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
ad::copy(x, cx);
ad::copy(y, cy);
BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
#endif
#ifdef BENCH_CUBLAS
T *cux, *cuy;
T result;
cudaMalloc((void**) &cux, N * sizeof(T));
cudaMalloc((void**) &cuy, N * sizeof(T));
BENCHMARK_CUDA(cublasSdot(N, cux, 1, cuy, 1, &result), 2*N*dtsize/t)
cudaFree(cux);
cudaFree(cuy);
#endif
std::cout << std::endl;
}
std::cout << "\n\n" << std::flush;
// /*---------*/
// /*--BLAS2--*/
// /*---------*/
// //T-layout
// std::cout << "#GEMV-T" << std::endl;
// for(int_t N: std::vector<int>{64})
// for(int_t N: std::vector<int>{128})
// for(int_t M: create_full_range(128, 10000, 64))
// {
// std::cout << M << "," << N;
// /* ATIDLAS */
// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
// y = dot(trans(A),x); queue.flush(); queue.finish();
// BENCHMARK_OPENCL(y = ad::controller<atidlas::array_expression>(dot(trans(A),x), ad::execution_options_type(0, &event)),(M*N + M + N)*dtsize/t);
// y = dot(trans(A),x); queue.finish();
// BENCHMARK_ATIDLAS(y = ad::control(dot(trans(A),x), ad::execution_options_type(0, &events), ad::dispatcher_options_type(true)),(M*N + M + N)*dtsize/t);
// #ifdef BENCH_CLAMDBLAS
// BENCHMARK_OPENCL(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
// BENCHMARK_CLAMDBLAS(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &queue(),0, NULL, &event()), (M*N + M + N)*dtsize/t)
// #endif
// #ifdef BENCH_CBLAS
// std::vector<float> cA(N*M), cx(N), cy(M);
@@ -192,9 +271,9 @@ void bench(ad::numeric_type dtype)
// }
// std::cout << "\n\n" << std::flush;
//// /*---------*/
//// /*--BLAS3--*/
//// /*---------*/
// /*---------*/
// /*--BLAS3--*/
// /*---------*/
// std::cout << "#GEMM-NT" << std::endl;
// for(std::vector<int_t>::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit)
// for(std::vector<int_t>::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit)
@@ -240,8 +319,8 @@ int main(int argc, char* argv[])
std::cerr << "usage : blas-bench [DEVICE_IDX]" << std::endl;
std::cout << "Devices available: " << std::endl;
unsigned int current=0;
for(const auto & queue : queues){
cl::Device device = queue.first.getInfo<CL_CONTEXT_DEVICES>()[0];
for(ad::cl_ext::queues_type::data_type::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
}
exit(EXIT_FAILURE);

View File

@@ -5,83 +5,6 @@
#include <cmath>
#include <algorithm>
int ceil(int N, int pad)
{
return (N%pad==0)?N:(N+pad-1)/pad*pad;
}
std::vector<int> create_log_range(int min, int max, int N, int pad)
{
std::vector<int> res(N);
for(int i = 0 ; i < N ; ++i)
{
res[i] = std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N);
res[i] = ceil(res[i], pad);
}
return res;
}
std::vector<int> create_full_range(int min, int max, int pad)
{
std::vector<int> N;
for(int i = ceil(min, pad) ; i < ceil(max, pad) ; i+=pad)
N.push_back(i);
return N;
}
template <typename T>
class make_vector {
public:
typedef make_vector<T> my_type;
my_type& operator<< (const T& val) {
data_.push_back(val);
return *this;
}
operator std::vector<T>() const {
return data_;
}
private:
std::vector<T> data_;
};
// BLAS1 Sizes
static const std::vector<int> BLAS1_N = create_log_range(1e3, 2e7, 50, 64);
// BLAS2 Sizes
static const std::vector<int> BLAS2_N = make_vector<int>() << 64;
static const std::vector<int> BLAS2_M = create_full_range(128, 10000, 64);
// BLAS3 Sizes
static const std::vector<int> BLAS3_M = make_vector<int>() << 1024;
static const std::vector<int> BLAS3_N = make_vector<int>() << 128;
static const std::vector<int> BLAS3_K = create_full_range(128, 5000, 64);
double bandwidth(std::size_t N, double t, unsigned int dtsize)
{ return N * dtsize * 1e-9 / t; }
double gflops(double nops, double t)
{ return nops * 1e-9 / t; }
template<class T>
T median(std::vector<T> x)
{
size_t size = x.size();
std::sort(x.begin(), x.end());
if (size % 2 == 0)
return (x[size / 2 - 1] + x[size / 2]) / 2;
else
return x[size / 2];
}
template<class T>
T mean(std::vector<T> x)
{
T res = 0;
int N = x.size();
for(int i = 0 ; i < N ; ++i)
res += x[i];
return res/N;
}
#endif

View File

@@ -160,26 +160,21 @@
#pragma push_macro("max")
#undef max
#if defined(USE_DX_INTEROP)
#include <CL/cl_d3d10.h>
#include <CL/cl_dx9_media_sharing.h>
#endif
#endif // _WIN32
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/opencl.h>
#include <libkern/OSAtomic.h>
#else
#include <CL/opencl.h>
#endif // !__APPLE__
//
#if defined(USE_CL_DEVICE_FISSION)
#include <CL/cl_ext.h>
#endif
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenGL/OpenGL.h>
#include <OpenCL/opencl.h>
#include <libkern/OSAtomic.h>
#else
#include <GL/gl.h>
#include <CL/opencl.h>
#endif // !__APPLE__
// To avoid accidentally taking ownership of core OpenCL types
// such as cl_kernel constructors are made explicit
// under OpenCL 1.2
@@ -367,9 +362,6 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer)
#define __COPY_ERR __ERR_STR(cl::copy)
#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer)
#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer)
#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer)
#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo)
#if defined(CL_VERSION_1_2)
#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage)
#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture)
@@ -3219,266 +3211,6 @@ public:
#endif
};
#if defined (USE_DX_INTEROP)
/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's.
*
* This is provided to facilitate interoperability with Direct3D.
*
* See Memory for details about copy semantics, etc.
*
* \see Memory
*/
class BufferD3D10 : public Buffer
{
public:
typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer,
cl_int* errcode_ret);
/*! \brief Constructs a BufferD3D10, in a specified context, from a
* given ID3D10Buffer.
*
* Wraps clCreateFromD3D10BufferKHR().
*/
BufferD3D10(
const Context& context,
cl_mem_flags flags,
ID3D10Buffer* bufobj,
cl_int * err = NULL)
{
static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
#if defined(CL_VERSION_1_2)
vector<cl_context_properties> props = context.getInfo<CL_CONTEXT_PROPERTIES>();
cl_platform platform = -1;
for( int i = 0; i < props.size(); ++i ) {
if( props[i] == CL_CONTEXT_PLATFORM ) {
platform = props[i+1];
}
}
__INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR);
#endif
#if defined(CL_VERSION_1_1)
__INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
#endif
cl_int error;
object_ = pfn_clCreateFromD3D10BufferKHR(
context(),
flags,
bufobj,
&error);
detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
if (err != NULL) {
*err = error;
}
}
//! \brief Default constructor - initializes to NULL.
BufferD3D10() : Buffer() { }
/*! \brief Copy constructor - performs shallow copy.
*
* See Memory for further details.
*/
BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
/*! \brief Constructor from cl_mem - takes ownership.
*
* See Memory for further details.
*/
__CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { }
/*! \brief Assignment from BufferD3D10 - performs shallow copy.
*
* See Memory for further details.
*/
BufferD3D10& operator = (const BufferD3D10& rhs)
{
if (this != &rhs) {
Buffer::operator=(rhs);
}
return *this;
}
/*! \brief Assignment from cl_mem - performs shallow copy.
*
* See Memory for further details.
*/
BufferD3D10& operator = (const cl_mem& rhs)
{
Buffer::operator=(rhs);
return *this;
}
};
#endif
/*! \brief Class interface for GL Buffer Memory Objects.
*
* This is provided to facilitate interoperability with OpenGL.
*
* See Memory for details about copy semantics, etc.
*
* \see Memory
*/
class BufferGL : public Buffer
{
public:
/*! \brief Constructs a BufferGL in a specified context, from a given
* GL buffer.
*
* Wraps clCreateFromGLBuffer().
*/
BufferGL(
const Context& context,
cl_mem_flags flags,
GLuint bufobj,
cl_int * err = NULL)
{
cl_int error;
object_ = ::clCreateFromGLBuffer(
context(),
flags,
bufobj,
&error);
detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
if (err != NULL) {
*err = error;
}
}
//! \brief Default constructor - initializes to NULL.
BufferGL() : Buffer() { }
/*! \brief Copy constructor - performs shallow copy.
*
* See Memory for further details.
*/
BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
/*! \brief Constructor from cl_mem - takes ownership.
*
* See Memory for further details.
*/
__CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { }
/*! \brief Assignment from BufferGL - performs shallow copy.
*
* See Memory for further details.
*/
BufferGL& operator = (const BufferGL& rhs)
{
if (this != &rhs) {
Buffer::operator=(rhs);
}
return *this;
}
/*! \brief Assignment from cl_mem - performs shallow copy.
*
* See Memory for further details.
*/
BufferGL& operator = (const cl_mem& rhs)
{
Buffer::operator=(rhs);
return *this;
}
//! \brief Wrapper for clGetGLObjectInfo().
cl_int getObjectInfo(
cl_gl_object_type *type,
GLuint * gl_object_name)
{
return detail::errHandler(
::clGetGLObjectInfo(object_,type,gl_object_name),
__GET_GL_OBJECT_INFO_ERR);
}
};
/*! \brief Class interface for GL Render Buffer Memory Objects.
*
* This is provided to facilitate interoperability with OpenGL.
*
* See Memory for details about copy semantics, etc.
*
* \see Memory
*/
class BufferRenderGL : public Buffer
{
public:
/*! \brief Constructs a BufferRenderGL in a specified context, from a given
* GL Renderbuffer.
*
* Wraps clCreateFromGLRenderbuffer().
*/
BufferRenderGL(
const Context& context,
cl_mem_flags flags,
GLuint bufobj,
cl_int * err = NULL)
{
cl_int error;
object_ = ::clCreateFromGLRenderbuffer(
context(),
flags,
bufobj,
&error);
detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR);
if (err != NULL) {
*err = error;
}
}
//! \brief Default constructor - initializes to NULL.
BufferRenderGL() : Buffer() { }
/*! \brief Copy constructor - performs shallow copy.
*
* See Memory for further details.
*/
BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
/*! \brief Constructor from cl_mem - takes ownership.
*
* See Memory for further details.
*/
__CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { }
/*! \brief Assignment from BufferGL - performs shallow copy.
*
* See Memory for further details.
*/
BufferRenderGL& operator = (const BufferRenderGL& rhs)
{
if (this != &rhs) {
Buffer::operator=(rhs);
}
return *this;
}
/*! \brief Assignment from cl_mem - performs shallow copy.
*
* See Memory for further details.
*/
BufferRenderGL& operator = (const cl_mem& rhs)
{
Buffer::operator=(rhs);
return *this;
}
//! \brief Wrapper for clGetGLObjectInfo().
cl_int getObjectInfo(
cl_gl_object_type *type,
GLuint * gl_object_name)
{
return detail::errHandler(
::clGetGLObjectInfo(object_,type,gl_object_name),
__GET_GL_OBJECT_INFO_ERR);
}
};
/*! \brief C++ base class for Image Memory objects.
*
@@ -3869,86 +3601,6 @@ public:
};
#if !defined(CL_VERSION_1_2)
/*! \brief Class interface for GL 2D Image Memory objects.
*
* This is provided to facilitate interoperability with OpenGL.
*
* See Memory for details about copy semantics, etc.
*
* \see Memory
* \note Deprecated for OpenCL 1.2. Please use ImageGL instead.
*/
class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D
{
public:
/*! \brief Constructs an Image2DGL in a specified context, from a given
* GL Texture.
*
* Wraps clCreateFromGLTexture2D().
*/
Image2DGL(
const Context& context,
cl_mem_flags flags,
GLenum target,
GLint miplevel,
GLuint texobj,
cl_int * err = NULL)
{
cl_int error;
object_ = ::clCreateFromGLTexture2D(
context(),
flags,
target,
miplevel,
texobj,
&error);
detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR);
if (err != NULL) {
*err = error;
}
}
//! \brief Default constructor - initializes to NULL.
Image2DGL() : Image2D() { }
/*! \brief Copy constructor - performs shallow copy.
*
* See Memory for further details.
*/
Image2DGL(const Image2DGL& image) : Image2D(image) { }
/*! \brief Constructor from cl_mem - takes ownership.
*
* See Memory for further details.
*/
__CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { }
/*! \brief Assignment from Image2DGL - performs shallow copy.
*
* See Memory for further details.
*/
Image2DGL& operator = (const Image2DGL& rhs)
{
if (this != &rhs) {
Image2D::operator=(rhs);
}
return *this;
}
/*! \brief Assignment from cl_mem - performs shallow copy.
*
* See Memory for further details.
*/
Image2DGL& operator = (const cl_mem& rhs)
{
Image2D::operator=(rhs);
return *this;
}
};
#endif // #if !defined(CL_VERSION_1_2)
#if defined(CL_VERSION_1_2)
/*! \class Image2DArray
@@ -4138,246 +3790,6 @@ public:
}
};
#if !defined(CL_VERSION_1_2)
/*! \brief Class interface for GL 3D Image Memory objects.
*
* This is provided to facilitate interoperability with OpenGL.
*
* See Memory for details about copy semantics, etc.
*
* \see Memory
*/
class Image3DGL : public Image3D
{
public:
/*! \brief Constructs an Image3DGL in a specified context, from a given
* GL Texture.
*
* Wraps clCreateFromGLTexture3D().
*/
Image3DGL(
const Context& context,
cl_mem_flags flags,
GLenum target,
GLint miplevel,
GLuint texobj,
cl_int * err = NULL)
{
cl_int error;
object_ = ::clCreateFromGLTexture3D(
context(),
flags,
target,
miplevel,
texobj,
&error);
detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR);
if (err != NULL) {
*err = error;
}
}
//! \brief Default constructor - initializes to NULL.
Image3DGL() : Image3D() { }
/*! \brief Copy constructor - performs shallow copy.
*
* See Memory for further details.
*/
Image3DGL(const Image3DGL& image) : Image3D(image) { }
/*! \brief Constructor from cl_mem - takes ownership.
*
* See Memory for further details.
*/
__CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { }
/*! \brief Assignment from Image3DGL - performs shallow copy.
*
* See Memory for further details.
*/
Image3DGL& operator = (const Image3DGL& rhs)
{
if (this != &rhs) {
Image3D::operator=(rhs);
}
return *this;
}
/*! \brief Assignment from cl_mem - performs shallow copy.
*
* See Memory for further details.
*/
Image3DGL& operator = (const cl_mem& rhs)
{
Image3D::operator=(rhs);
return *this;
}
};
#endif // #if !defined(CL_VERSION_1_2)
#if defined(CL_VERSION_1_2)
/*! \class ImageGL
* \brief general image interface for GL interop.
* We abstract the 2D and 3D GL images into a single instance here
* that wraps all GL sourced images on the grounds that setup information
* was performed by OpenCL anyway.
*/
class ImageGL : public Image
{
public:
ImageGL(
const Context& context,
cl_mem_flags flags,
GLenum target,
GLint miplevel,
GLuint texobj,
cl_int * err = NULL)
{
cl_int error;
object_ = ::clCreateFromGLTexture(
context(),
flags,
target,
miplevel,
texobj,
&error);
detail::errHandler(error, __CREATE_GL_TEXTURE_ERR);
if (err != NULL) {
*err = error;
}
}
ImageGL() : Image() { }
ImageGL(const ImageGL& image) : Image(image) { }
__CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { }
ImageGL& operator = (const ImageGL& rhs)
{
if (this != &rhs) {
Image::operator=(rhs);
}
return *this;
}
ImageGL& operator = (const cl_mem& rhs)
{
Image::operator=(rhs);
return *this;
}
};
#endif // #if defined(CL_VERSION_1_2)
/*! \brief Class interface for cl_sampler.
*
* \note Copies of these objects are shallow, meaning that the copy will refer
* to the same underlying cl_sampler as the original. For details, see
* clRetainSampler() and clReleaseSampler().
*
* \see cl_sampler
*/
class Sampler : public detail::Wrapper<cl_sampler>
{
public:
/*! \brief Destructor.
*
* This calls clReleaseSampler() on the value held by this instance.
*/
~Sampler() { }
//! \brief Default constructor - initializes to NULL.
Sampler() { }
/*! \brief Constructs a Sampler in a specified context.
*
* Wraps clCreateSampler().
*/
Sampler(
const Context& context,
cl_bool normalized_coords,
cl_addressing_mode addressing_mode,
cl_filter_mode filter_mode,
cl_int* err = NULL)
{
cl_int error;
object_ = ::clCreateSampler(
context(),
normalized_coords,
addressing_mode,
filter_mode,
&error);
detail::errHandler(error, __CREATE_SAMPLER_ERR);
if (err != NULL) {
*err = error;
}
}
/*! \brief Copy constructor - performs shallow copy.
*
* This calls clRetainSampler() on the parameter's cl_sampler.
*/
Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
/*! \brief Constructor from cl_sampler - takes ownership.
*
* This effectively transfers ownership of a refcount on the cl_sampler
* into the new Sampler object.
*/
Sampler(const cl_sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
/*! \brief Assignment operator from Sampler.
*
* This calls clRetainSampler() on the parameter and clReleaseSampler()
* on the previous value held by this instance.
*/
Sampler& operator = (const Sampler& rhs)
{
if (this != &rhs) {
detail::Wrapper<cl_type>::operator=(rhs);
}
return *this;
}
/*! \brief Assignment operator from cl_sampler - takes ownership.
*
* This effectively transfers ownership of a refcount on the rhs and calls
* clReleaseSampler() on the value previously held by this instance.
*/
Sampler& operator = (const cl_sampler& rhs)
{
detail::Wrapper<cl_type>::operator=(rhs);
return *this;
}
//! \brief Wrapper for clGetSamplerInfo().
template <typename T>
cl_int getInfo(cl_sampler_info name, T* param) const
{
return detail::errHandler(
detail::getInfo(&::clGetSamplerInfo, object_, name, param),
__GET_SAMPLER_INFO_ERR);
}
//! \brief Wrapper for clGetSamplerInfo() that returns by value.
template <cl_int name> typename
detail::param_traits<detail::cl_sampler_info, name>::param_type
getInfo(cl_int* err = NULL) const
{
typename detail::param_traits<
detail::cl_sampler_info, name>::param_type param;
cl_int result = getInfo(name, &param);
if (err != NULL) {
*err = result;
}
return param;
}
};
class Program;
class CommandQueue;
class Kernel;
@@ -6012,126 +5424,8 @@ public:
}
#endif // #if defined(CL_VERSION_1_1)
cl_int enqueueAcquireGLObjects(
const VECTOR_CLASS<Memory>* mem_objects = NULL,
const VECTOR_CLASS<Event>* events = NULL,
Event* event = NULL) const
{
cl_event tmp;
cl_int err = detail::errHandler(
::clEnqueueAcquireGLObjects(
object_,
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
(events != NULL) ? (cl_uint) events->size() : 0,
(events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
(event != NULL) ? &tmp : NULL),
__ENQUEUE_ACQUIRE_GL_ERR);
if (event != NULL && err == CL_SUCCESS)
*event = tmp;
return err;
}
cl_int enqueueReleaseGLObjects(
const VECTOR_CLASS<Memory>* mem_objects = NULL,
const VECTOR_CLASS<Event>* events = NULL,
Event* event = NULL) const
{
cl_event tmp;
cl_int err = detail::errHandler(
::clEnqueueReleaseGLObjects(
object_,
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
(events != NULL) ? (cl_uint) events->size() : 0,
(events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
(event != NULL) ? &tmp : NULL),
__ENQUEUE_RELEASE_GL_ERR);
if (event != NULL && err == CL_SUCCESS)
*event = tmp;
return err;
}
#if defined (USE_DX_INTEROP)
typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
cl_command_queue command_queue, cl_uint num_objects,
const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
const cl_event* event_wait_list, cl_event* event);
typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
cl_command_queue command_queue, cl_uint num_objects,
const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
const cl_event* event_wait_list, cl_event* event);
cl_int enqueueAcquireD3D10Objects(
const VECTOR_CLASS<Memory>* mem_objects = NULL,
const VECTOR_CLASS<Event>* events = NULL,
Event* event = NULL) const
{
static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
#if defined(CL_VERSION_1_2)
cl_context context = getInfo<CL_QUEUE_CONTEXT>();
cl::Device device(getInfo<CL_QUEUE_DEVICE>());
cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
__INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR);
#endif
#if defined(CL_VERSION_1_1)
__INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
#endif
cl_event tmp;
cl_int err = detail::errHandler(
pfn_clEnqueueAcquireD3D10ObjectsKHR(
object_,
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
(events != NULL) ? (cl_uint) events->size() : 0,
(events != NULL) ? (cl_event*) &events->front() : NULL,
(event != NULL) ? &tmp : NULL),
__ENQUEUE_ACQUIRE_GL_ERR);
if (event != NULL && err == CL_SUCCESS)
*event = tmp;
return err;
}
cl_int enqueueReleaseD3D10Objects(
const VECTOR_CLASS<Memory>* mem_objects = NULL,
const VECTOR_CLASS<Event>* events = NULL,
Event* event = NULL) const
{
static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
#if defined(CL_VERSION_1_2)
cl_context context = getInfo<CL_QUEUE_CONTEXT>();
cl::Device device(getInfo<CL_QUEUE_DEVICE>());
cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
__INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR);
#endif // #if defined(CL_VERSION_1_2)
#if defined(CL_VERSION_1_1)
__INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
#endif // #if defined(CL_VERSION_1_1)
cl_event tmp;
cl_int err = detail::errHandler(
pfn_clEnqueueReleaseD3D10ObjectsKHR(
object_,
(mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
(mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
(events != NULL) ? (cl_uint) events->size() : 0,
(events != NULL) ? (cl_event*) &events->front() : NULL,
(event != NULL) ? &tmp : NULL),
__ENQUEUE_RELEASE_GL_ERR);
if (event != NULL && err == CL_SUCCESS)
*event = tmp;
return err;
}
#endif
/**
* Deprecated APIs for 1.2

View File

@@ -2,7 +2,6 @@
#define ATIDLAS_ARRAY_H_
#include <iostream>
#include <type_traits>
#include <CL/cl.hpp>
#include "atidlas/types.h"
#include "atidlas/cl_ext/backend.h"
@@ -17,8 +16,7 @@ class scalar;
class array: public array_base
{
friend array reshape(array const &, int_t, int_t);
template<class T>
struct is_array { enum{ value = std::is_same<T, array>::value || std::is_same<T, array_expression>::value}; };
public:
//1D Constructors
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());

View File

@@ -20,7 +20,7 @@ enum leaf_t
class mapped_object;
typedef std::pair<int_t, leaf_t> mapping_key;
typedef std::map<mapping_key, std::shared_ptr<mapped_object> > mapping_type;
typedef std::map<mapping_key, tools::shared_ptr<mapped_object> > mapping_type;
/** @brief Mapped Object
*

View File

@@ -4,11 +4,11 @@
#include <list>
#include <set>
#include <CL/cl.hpp>
#include "atidlas/types.h"
#include "atidlas/backend/parse.h"
#include "atidlas/backend/stream.h"
#include <CL/cl.hpp>
#include "atidlas/cl_ext/lazy_compiler.h"
#include "atidlas/symbolic/expression.h"
@@ -75,15 +75,15 @@ protected:
/** @brief Accessor for the numeric type */
numeric_type get_numeric_type(atidlas::array_expression const * array_expression, int_t root_idx) const;
/** @brief Creates a binary leaf */
template<class T> std::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
template<class T> tools::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
/** @brief Creates a value scalar mapping */
std::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
tools::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
/** @brief Creates a vector mapping */
std::shared_ptr<mapped_object> create(array_infos const &) const;
tools::shared_ptr<mapped_object> create(array_infos const &) const;
/** @brief Creates a tuple mapping */
std::shared_ptr<mapped_object> create(repeat_infos const &) const;
tools::shared_ptr<mapped_object> create(repeat_infos const &) const;
/** @brief Creates a mapping */
std::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
tools::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
public:
map_functor(symbolic_binder & binder, mapping_type & mapping);
/** @brief Functor for traversing the tree */
@@ -143,7 +143,7 @@ protected:
static bool is_reduction(array_expression::node const & node);
static bool is_index_reduction(op_element const & op);
std::shared_ptr<symbolic_binder> make_binder();
tools::shared_ptr<symbolic_binder> make_binder();
static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr);
static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr);
static std::string append_width(std::string const & str, unsigned int width);
@@ -164,7 +164,7 @@ public:
virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
virtual void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, controller<expressions_tuple> const & expressions) = 0;
virtual std::shared_ptr<base> clone() const = 0;
virtual tools::shared_ptr<base> clone() const = 0;
private:
binding_policy_t binding_policy_;
};
@@ -180,7 +180,7 @@ public:
base_impl(parameters_type const & parameters, binding_policy_t binding_policy);
int_t local_size_0() const;
int_t local_size_1() const;
std::shared_ptr<base> clone() const;
tools::shared_ptr<base> clone() const;
/** @brief returns whether or not the profile has undefined behavior on particular device */
int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const;
protected:

View File

@@ -16,7 +16,7 @@ namespace atidlas
class model
{
typedef std::vector< std::shared_ptr<base> > templates_container;
typedef std::vector< tools::shared_ptr<base> > templates_container;
private:
std::string define_extension(std::string const & extensions, std::string const & ext);
@@ -24,21 +24,21 @@ namespace atidlas
std::vector<cl_ext::lazy_compiler>& init(controller<expressions_tuple> const &);
public:
model(predictors::random_forest const &, std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
model(std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
model(std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
model(base const &, cl::CommandQueue &);
void execute(controller<expressions_tuple> const &);
templates_container const & templates() const;
private:
templates_container templates_;
std::shared_ptr<predictors::random_forest> predictor_;
tools::shared_ptr<predictors::random_forest> predictor_;
std::map<std::vector<int_t>, int> hardcoded_;
std::map<cl_context, std::map<std::string, std::vector<cl_ext::lazy_compiler> > > lazy_programs_;
cl::CommandQueue & queue_;
};
typedef std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<model> > model_map_t;
typedef std::map<std::pair<expression_type, numeric_type>, tools::shared_ptr<model> > model_map_t;
model_map_t init_models(cl::CommandQueue const & queue);
model_map_t& get_model_map(cl::CommandQueue & queue);

View File

@@ -6,7 +6,8 @@
#include <CL/cl.hpp>
#include "atidlas/types.h"
#include "atidlas/value_scalar.h"
#include <memory>
#include "atidlas/tools/shared_ptr.hpp"
#include <iostream>
namespace atidlas
{
@@ -221,21 +222,14 @@ class operation_cache
};
public:
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
{ l_.push_back({queue, kernel, offset, global, local, dependencies}); }
void enqueue(std::list<cl::Event>* events = NULL)
{
for(infos & i : l_){
events->push_back(cl::Event());
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
}
}
void push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies);
void enqueue(std::list<cl::Event>* events = NULL);
private:
std::list<infos> l_;
};
struct execution_options_type
{
execution_options_type(unsigned int _queue_id = 0, std::list<cl::Event>* _events = NULL, operation_cache* _cache = NULL, std::vector<cl::Event>* _dependencies = NULL) : queue_id(_queue_id), events(_events), cache(_cache), dependencies(_dependencies){}
@@ -299,9 +293,9 @@ controller<TYPE> control(TYPE const & x, execution_options_type const& execution
class expressions_tuple
{
private:
std::shared_ptr<array_expression> create(array_expression const & s);
tools::shared_ptr<array_expression> create(array_expression const & s);
public:
typedef std::list<std::shared_ptr<array_expression> > data_type;
typedef std::list<tools::shared_ptr<array_expression> > data_type;
enum order_type { SEQUENTIAL, INDEPENDENT };
expressions_tuple(array_expression const & s0);

View File

@@ -0,0 +1,162 @@
#ifndef ATIDLAS_TOOLS_SHARED_PTR_HPP
#define ATIDLAS_TOOLS_SHARED_PTR_HPP
/* =========================================================================
Copyright (c) 2010-2012, Institute for Microelectronics,
Institute for Analysis and Scientific Computing,
TU Wien.
Portions of this software are copyright by UChicago Argonne, LLC.
-----------------
ViennaCL - The Vienna Computing Library
-----------------
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
(A list of authors and contributors can be found in the PDF manual)
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
/** @file tools/shared_ptr.hpp
@brief Implementation of a shared pointer class (cf. tools::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
Contributed by Philippe Tillet.
*/
#include <cstdlib>
#include <algorithm>
namespace atidlas
{
namespace tools
{
namespace detail
{
/** @brief Reference counting class for the shared_ptr implementation */
class count
{
public:
count(unsigned int val) : val_(val){ }
void dec(){ --val_; }
void inc(){ ++val_; }
bool is_null(){ return val_ == 0; }
unsigned int val(){ return val_; }
private:
unsigned int val_;
};
/** @brief Interface for the reference counter inside the shared_ptr */
struct aux
{
detail::count count;
aux() :count(1) {}
virtual void destroy()=0;
virtual ~aux() {}
};
/** @brief Implementation helper for the reference counting mechanism inside shared_ptr. */
template<class U, class Deleter>
struct auximpl: public detail::aux
{
U* p;
Deleter d;
auximpl(U* pu, Deleter x) :p(pu), d(x) {}
virtual void destroy() { d(p); }
};
/** @brief Default deleter class for a pointer. The default is to just call 'delete' on the pointer. Provide your own implementations for 'delete[]' and 'free'. */
template<class U>
struct default_deleter
{
void operator()(U* p) const { delete p; }
};
}
class shared_ptr_base
{
protected:
detail::aux* pa;
public:
unsigned int count() { return pa->count.val(); }
};
/** @brief A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependency. Will be replaced by tools::shared_ptr as soon as C++11 is widely available. */
template<class T>
class shared_ptr : public shared_ptr_base
{
template<class U>
friend class shared_ptr;
detail::aux* pa;
T* pt;
public:
shared_ptr() :pa(NULL), pt(NULL) {}
template<class U, class Deleter>
shared_ptr(U* pu, Deleter d) : pa(new detail::auximpl<U, Deleter>(pu, d)), pt(pu) {}
template<class U>
explicit shared_ptr(U* pu) : pa(new detail::auximpl<U, detail::default_deleter<U> >(pu, detail::default_deleter<U>())), pt(pu) {}
template<class U>
shared_ptr(const shared_ptr<U>& s) :pa(s.pa), pt(s.pt) { inc(); }
shared_ptr(const shared_ptr& s) :pa(s.pa), pt(s.pt) { inc(); }
~shared_ptr() { dec(); }
T* get() const { return pt; }
T* operator->() const { return pt; }
T& operator*() const { return *pt; }
void reset() { shared_ptr<T>().swap(*this); }
void reset(T * ptr) { shared_ptr<T>(ptr).swap(*this); }
void swap(shared_ptr<T> & other)
{
std::swap(pt,other.pt);
std::swap(pa, other.pa);
}
shared_ptr& operator=(const shared_ptr& s)
{
if (this!=&s)
{
dec();
pa = s.pa;
pt = s.pt;
inc();
}
return *this;
}
void inc()
{
if (pa) pa->count.inc();
}
void dec()
{
if (pa)
{
pa->count.dec();
if (pa->count.is_null())
{
pa->destroy();
delete pa;
pa = NULL;
}
}
}
};
}
}
#endif

View File

@@ -30,44 +30,44 @@ numeric_type base::map_functor::get_numeric_type(atidlas::array_expression const
/** @brief Binary leaf */
template<class T>
std::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
tools::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
{
return std::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
return tools::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
}
/** @brief Scalar mapping */
std::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
tools::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
{
std::string strdtype = numeric_type_to_string(dtype);
return std::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
return tools::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
}
/** @brief Vector mapping */
std::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
tools::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
{
std::string dtype = numeric_type_to_string(a.dtype);
unsigned int id = binder_.get(a.data);
//Scalar
if(a.shape1==1 && a.shape2==1)
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
//Column vector
else if(a.shape1>1 && a.shape2==1)
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
//Row vector
else if(a.shape1==1 && a.shape2>1)
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
//Matrix
else
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
}
std::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
tools::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
{
//TODO: Make it less specific!
return std::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
return tools::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
}
std::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
tools::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
{
switch(lhs_rhs.type_family)
{
@@ -111,7 +111,7 @@ void base::map_functor::operator()(atidlas::array_expression const & array_expre
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_outer>(&array_expression, root_idx, &mapping_)));
else if (detail::is_cast(root_node.op))
mapping_.insert(mapping_type::value_type(key, std::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
mapping_.insert(mapping_type::value_type(key, tools::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
}
}
@@ -280,7 +280,7 @@ std::string base::generate_arguments(std::string const & data_type, std::vector<
void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg)
{
std::shared_ptr<symbolic_binder> binder = make_binder();
tools::shared_ptr<symbolic_binder> binder = make_binder();
for (const auto & elem : expressions.data())
traverse(*elem, (elem)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
}
@@ -478,12 +478,12 @@ unsigned int base::align(unsigned int to_round, unsigned int base)
return (to_round + base - 1)/base * base;
}
std::shared_ptr<symbolic_binder> base::make_binder()
tools::shared_ptr<symbolic_binder> base::make_binder()
{
if (binding_policy_==BIND_TO_HANDLE)
return std::shared_ptr<symbolic_binder>(new bind_to_handle());
return tools::shared_ptr<symbolic_binder>(new bind_to_handle());
else
return std::shared_ptr<symbolic_binder>(new bind_all_unique());
return tools::shared_ptr<symbolic_binder>(new bind_all_unique());
}
@@ -509,7 +509,7 @@ std::vector<std::string> base::generate(unsigned int label, expressions_tuple co
//Create mapping
std::vector<mapping_type> mappings(expressions.data().size());
std::shared_ptr<symbolic_binder> binder = make_binder();
tools::shared_ptr<symbolic_binder> binder = make_binder();
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit)
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
@@ -533,8 +533,8 @@ int_t base_impl<TType, PType>::local_size_1() const
{ return p_.local_size_1; }
template<class TType, class PType>
std::shared_ptr<base> base_impl<TType, PType>::clone() const
{ return std::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
tools::shared_ptr<base> base_impl<TType, PType>::clone() const
{ return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
template<class TType, class PType>
int base_impl<TType, PType>::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const

View File

@@ -587,7 +587,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
kernel.setArg(current_arg++, cl_uint(N));
kernel.setArg(current_arg++, cl_uint(K));
std::shared_ptr<symbolic_binder> binder = make_binder();
tools::shared_ptr<symbolic_binder> binder = make_binder();
set_arguments_functor fun(*binder, current_arg, kernel);
fun.set_arguments(C);
fun.set_arguments(alpha.dtype(), alpha.values());

View File

@@ -82,11 +82,11 @@ std::vector<cl_ext::lazy_compiler>& model::init(controller<expressions_tuple> co
return to_init;
}
model::model(predictors::random_forest const & predictor, std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
model::model(predictors::random_forest const & predictor, std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
templates_(templates), predictor_(new predictors::random_forest(predictor)), queue_(queue)
{}
model::model(std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
model::model(std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
{}
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
@@ -158,27 +158,27 @@ namespace detail
throw std::invalid_argument("Invalid datatype: " + name);
}
static std::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
static tools::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
{
fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS};
if(template_name=="vaxpy")
return std::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
return tools::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
else if(template_name=="dot")
return std::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
return tools::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
else if(template_name=="maxpy")
return std::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
return tools::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
else if(template_name.find("gemvN")!=std::string::npos)
return std::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
return tools::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
else if(template_name.find("gemvT")!=std::string::npos)
return std::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
return tools::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
else if(template_name.find("gemmNN")!=std::string::npos)
return std::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return tools::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmTN")!=std::string::npos)
return std::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return tools::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmNT")!=std::string::npos)
return std::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return tools::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmTT")!=std::string::npos)
return std::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return tools::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else
throw std::invalid_argument("Invalid expression: " + template_name);
}
@@ -214,7 +214,7 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
numeric_type dtype = detail::get_dtype(elem);
// Get profiles
std::vector<std::shared_ptr<base> > templates;
std::vector<tools::shared_ptr<base> > templates;
js::Value const & profiles = document[opcstr][dtcstr]["profiles"];
for (js::SizeType id = 0 ; id < profiles.Size() ; ++id)
templates.push_back(detail::create(operation, tools::to_int_array<int>(profiles[id])));
@@ -222,10 +222,10 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
{
// Get predictor
predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]);
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(predictor, templates, queue));
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(predictor, templates, queue));
}
else
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(templates, queue));
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(templates, queue));
}
}
}
@@ -235,7 +235,7 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
model_map_t init_models(cl::CommandQueue & queue)
{
model_map_t res;
typedef std::shared_ptr<model> ptr_t;
typedef tools::shared_ptr<model> ptr_t;
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
for(auto DTYPE : types){

View File

@@ -173,30 +173,30 @@ namespace atidlas
/*----Parse required temporaries-----*/
detail::parse(tree, rootidx, current_type, breakpoints, final_type);
std::vector<std::shared_ptr<array> > temporaries_;
std::vector<tools::shared_ptr<array> > temporaries_;
/*----Compute required temporaries----*/
for(detail::breakpoints_t::reverse_iterator rit = breakpoints.rbegin() ; rit != breakpoints.rend() ; ++rit)
{
std::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
tools::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
array_expression::node const & node = tree[rit->second->node_index];
array_expression::node const & lmost = lhs_most(tree, node);
//Creates temporary
std::shared_ptr<array> tmp;
tools::shared_ptr<array> tmp;
switch(rit->first){
case SCALAR_AXPY_TYPE:
case REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(1, dtype, context)); break;
case REDUCTION_TYPE: tmp = tools::shared_ptr<array>(new array(1, dtype, context)); break;
case VECTOR_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case ROW_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case COL_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape2, dtype, context)); break;
case VECTOR_AXPY_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case ROW_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case COL_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape2, dtype, context)); break;
case MATRIX_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
case MATRIX_PRODUCT_TN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_TT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
case MATRIX_AXPY_TYPE: tmp = tools::shared_ptr<array>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NN_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NT_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
case MATRIX_PRODUCT_TN_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_TT_TYPE: tmp = tools::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
default: throw std::invalid_argument("Unrecognized operation");
}

View File

@@ -176,11 +176,24 @@ array_expression array_expression::operator-()
array_expression array_expression::operator!()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
//
void operation_cache::push_back(cl::CommandQueue & queue, cl::Kernel const & kernel, cl::NDRange const & offset, cl::NDRange const & global, cl::NDRange const & local, std::vector<cl::Event>* dependencies)
{
l_.push_back({queue, kernel, offset, global, local, dependencies});
}
void operation_cache::enqueue(std::list<cl::Event>* events)
{
for(infos & i : l_){
events->push_back(cl::Event());
i.queue.enqueueNDRangeKernel(i.kernel, i.offset, i.global, i.local, i.dependencies, &events->back());
}
}
//
std::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
tools::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
{
return std::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
return tools::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
}
expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order)

View File

@@ -143,7 +143,7 @@ def do_tuning(args):
A = atd.empty(sizes, datatype, context=context)
C = atd.empty(sizes, datatype, context=context)
return execute(A + C, sizes, Template, parameters, fname)
tune(execution_handler, 100, 5000, 2, (),'log', 'log')
tune(execution_handler, 64, 5000, 2, (),'log', 'log')
#Row-wise dot
if operation=='gemv':
for A_trans in args.gemv_layouts:
@@ -152,7 +152,7 @@ def do_tuning(args):
x = atd.empty(sizes[1], datatype, context=context)
LHS = A if A_trans=='N' else A.T
return execute(atd.dot(LHS, x), sizes, Template[A_trans], parameters, fname)
tune(execution_handler, 100, 5000, 2, (A_trans,),'log', 'log')
tune(execution_handler, 64, 6000, 2, (A_trans,),'log', 'log')
#Matrix Product
if operation=='gemm':
for L in args.gemm_layouts:
@@ -194,7 +194,7 @@ class ArgumentsHandler:
full_parser = tune_subparsers.add_parser('full', help = 'Tune each operation for randomly chosen sizes')
full_parser.add_argument("--build-model", default=True, type=bool)
full_parser.add_argument("--sample-size", default=30, type=int)
full_parser.add_argument("--sample-size", default=60, type=int)
args = parser.parse_args()
self.__dict__ = args.__dict__.copy()

View File

@@ -218,11 +218,16 @@ def benchmark(template, symbolic):
raise ValueError("Template has too low occupancy")
else:
queue.models[template, atd.float32] = atd.model(template, queue)
x = atd.array(symbolic)
atd.synchronize(symbolic.context)
x, events, cache = atd.flush(symbolic)
atd.synchronize(symbolic.context)
return 1e-9*sum([e.end - e.start for e in events])
timings = []
current_time = 0
while current_time < 1e-3:
x, events, cache = atd.flush(symbolic)
atd.synchronize(symbolic.context)
timings.append(1e-9*sum([e.end - e.start for e in events]))
current_time = current_time + timings[-1]
return np.median(timings)
def sanitize_string(string, keep_chars = ['_']):

View File

@@ -193,7 +193,7 @@ namespace boost
#if !defined(BOOST_NO_CXX11_SMART_PTR)
template <typename T>
inline std::size_t hash_value(std::shared_ptr<T> const& x) {
inline std::size_t hash_value(tools::shared_ptr<T> const& x) {
return boost::hash_value(x.get());
}

View File

@@ -36,7 +36,7 @@ template<class T> T * get_pointer( std::unique_ptr<T> const& p )
return p.get();
}
template<class T> T * get_pointer( std::shared_ptr<T> const& p )
template<class T> T * get_pointer( tools::shared_ptr<T> const& p )
{
return p.get();
}

View File

@@ -315,15 +315,14 @@ namespace detail
boost::shared_ptr<cl::Context> make_context(cl::Device const & dev)
{ return boost::shared_ptr<cl::Context>(new cl::Context(std::vector<cl::Device>(1, dev))); }
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, int label, std::string const & program_name, bool force_recompile)
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
{
std::list<cl::Event> events;
atd::operation_cache cache;
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
atd::dispatcher_options_type(label), atd::compilation_options_type(program_name, force_recompile))));
return bp::make_tuple(*parray, to_list(events.begin(), events.end()), cache);
atd::dispatcher_options_type(tune, label), atd::compilation_options_type(program_name, force_recompile))));
return bp::make_tuple(parray, to_list(events.begin(), events.end()), cache);
}
}
@@ -402,7 +401,7 @@ void export_cl()
bp::def("synchronize", &atd::cl_ext::synchronize);
bp::def("get_platforms", &detail::get_platforms);
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::class_<state_type>("state_type")
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)

View File

@@ -10,7 +10,7 @@ namespace ad = atidlas;
int main()
{
viennacl::vector<float> x(10000), y(10000), z(10000);
std::map<std::string, ad::std::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
std::map<std::string, ad::tools::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
return EXIT_SUCCESS;