Now using system CL include

This commit is contained in:
Philippe Tillet
2015-01-27 16:14:02 -05:00
parent 53c9bef85d
commit c37d8a2a81
39 changed files with 154 additions and 12608 deletions

View File

@@ -16,7 +16,7 @@
namespace ad = atidlas;
typedef atidlas::int_t int_t;
typedef ad::int_t int_t;
template<class T>
void bench(ad::numeric_type dtype)
@@ -31,11 +31,11 @@ void bench(ad::numeric_type dtype)
times.clear();\
total_time = 0;\
OP;\
ad::cl::synchronize(ad::cl::default_context());\
ad::cl_ext::synchronize(ad::cl_ext::default_context());\
while(total_time < 1e-2){\
timer.start(); \
OP;\
ad::cl::synchronize(ad::cl::default_context());\
ad::cl_ext::synchronize(ad::cl_ext::default_context());\
times.push_back(timer.get());\
total_time += times.back();\
}\
@@ -52,17 +52,17 @@ void bench(ad::numeric_type dtype)
int_t N = *it;
std::cout << N;
/* ATIDLAS */
atidlas::array x(N, dtype), y(N, dtype);
ad::array x(N, dtype), y(N, dtype);
BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize));
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
atidlas::copy(x, cx);
atidlas::copy(y, cy);
ad::copy(x, cx);
ad::copy(y, cy);
BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
#endif
/* CuBLAS */
@@ -84,19 +84,19 @@ void bench(ad::numeric_type dtype)
int_t N = *it;
std::cout << N;
/* ATIDLAS */
atidlas::array x(N, dtype), y(N, dtype);
atidlas::array scratch(N, dtype);
atidlas::scalar s(dtype);
ad::array x(N, dtype), y(N, dtype);
ad::array scratch(N, dtype);
ad::scalar s(dtype);
BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
atidlas::copy(x, cx);
atidlas::copy(y, cy);
ad::copy(x, cx);
ad::copy(y, cy);
BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
#endif
std::cout << std::endl;
@@ -115,18 +115,18 @@ void bench(ad::numeric_type dtype)
int_t N = *Nit;
std::cout << M << "," << N;
/* ATIDLAS */
atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype);
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cA(N*M), cx(N), cy(M);
atidlas::copy(x, cx);
atidlas::copy(y, cy);
atidlas::copy(A, cA);
ad::copy(x, cx);
ad::copy(y, cy);
ad::copy(A, cA);
BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
#endif
std::cout << std::endl;
@@ -144,19 +144,19 @@ void bench(ad::numeric_type dtype)
int_t M = *Mit, N = *Nit, K = *Kit;
std::cout << M << "," << N << "," << K;
/* ATIDLAS */
atidlas::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype);
ad::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype);
BENCHMARK(C = dot(A,trans(B)), gflops((double)2*M*N*K, tres));
/* clAmdBlas */
#ifdef BENCH_CLAMDBLAS
BENCHMARK(clAmdBlasSgemm(clAmdBlasColumnMajor, clAmdBlasNoTrans, clAmdBlasTrans, M, N, K, 1, A.data()(), A.ld(), B.data()(), B.ld(),
0, C.data()(), C.ld(), 1, &atidlas::cl::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres))
0, C.data()(), C.ld(), 1, &ad::cl_ext::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres))
#endif
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
atidlas::copy(C, cC);
atidlas::copy(A, cA);
atidlas::copy(B, cB);
ad::copy(C, cC);
ad::copy(A, cA);
ad::copy(B, cB);
BENCHMARK(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), M, cB.data(), N, 1, cC.data(), M), gflops((double)2*M*N*K, tres));
#endif
std::cout << std::endl;
@@ -171,16 +171,16 @@ int main(int argc, char* argv[])
#endif
int device_idx = 0;
if(atidlas::cl::queues.size()>1){
atidlas::cl::queues_t & queues = atidlas::cl::queues;
if(ad::cl_ext::queues.size()>1){
ad::cl_ext::queues_t & queues = ad::cl_ext::queues;
if(argc!=2)
{
std::cerr << "usage : blas-bench [DEVICE_IDX]" << std::endl;
std::cout << "Devices available: " << std::endl;
unsigned int current=0;
for(atidlas::cl::queues_t::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
atidlas::cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << atidlas::cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
for(ad::cl_ext::queues_t::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
}
exit(EXIT_FAILURE);
}
@@ -188,7 +188,7 @@ int main(int argc, char* argv[])
device_idx = atoi(argv[1]);
}
atidlas::cl::default_context_idx = device_idx;
ad::cl_ext::default_context_idx = device_idx;
std::cout << "#Benchmark : BLAS" << std::endl;
std::cout << "#----------------" << std::endl;
bench<float>(ad::FLOAT_TYPE);

View File

@@ -7,19 +7,19 @@ namespace ad = atidlas;
int main()
{
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
{
ad::array x(10, ad::FLOAT_TYPE, it->first);
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
ad::tools::timer t;
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "-------------------------" << std::endl;
x = x + x;
ad::cl::synchronize(x.context());
ad::cl_ext::synchronize(x.context());
t.start();\
for(unsigned int i = 0 ; i < 100 ; ++i){
x = x + x;
ad::cl::synchronize(x.context());
ad::cl_ext::synchronize(x.context());
}
std::cout << "Kernel launch overhead: " << t.get()/100 << std::endl;
std::cout << "Expression tree creation:" << std::endl;

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include "atidlas/types.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/cl/queues.h"
#include "atidlas/symbolic/expression.h"
@@ -18,19 +18,19 @@ class array: public obj_base
friend array reshape(array const &, int_t, int_t);
public:
//1D Constructors
array(int_t size1, numeric_type dtype, cl::Context context = cl::default_context());
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
template<typename DT>
array(std::vector<DT> const & data, cl::Context context = cl::default_context());
array(std::vector<DT> const & data, cl::Context context = cl_ext::default_context());
array(array & v, slice const & s1);
//2D Constructors
array(int_t size1, int_t size2, numeric_type dtype, cl::Context context = cl::default_context());
array(int_t size1, int_t size2, numeric_type dtype, cl::Context context = cl_ext::default_context());
template<typename DT>
array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context = cl::default_context());
array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context = cl_ext::default_context());
array(array & M, slice const & s1, slice const & s2);
//General constructor
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl::default_context());
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl_ext::default_context());
array(array_expression const & proxy);
array(array const &);
@@ -91,9 +91,9 @@ class scalar : public array
private:
template<class T> T cast() const;
public:
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl::default_context());
explicit scalar(value_scalar value, cl::Context context = cl::default_context());
explicit scalar(numeric_type dtype, cl::Context context = cl::default_context());
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl_ext::default_context());
explicit scalar(value_scalar value, cl::Context context = cl_ext::default_context());
explicit scalar(numeric_type dtype, cl::Context context = cl_ext::default_context());
scalar(array_expression const & proxy);
scalar& operator=(value_scalar const &);
// scalar& operator=(scalar const & s);
@@ -209,8 +209,8 @@ ATIDLAS_DECLARE_REDUCTION(max)
ATIDLAS_DECLARE_REDUCTION(min)
ATIDLAS_DECLARE_REDUCTION(argmin)
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl::default_context());
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl::default_context());
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl_ext::default_context());
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl_ext::default_context());
array reshape(array const &, int_t, int_t);
//

View File

@@ -2,7 +2,7 @@
#define ATIDLAS_BACKEND_BINDER_H
#include <map>
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
namespace atidlas
{

View File

@@ -8,7 +8,7 @@
#include "atidlas/types.h"
#include "atidlas/backend/parse.h"
#include "atidlas/backend/stream.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/cl/lazy_compiler.h"
#include "atidlas/symbolic/expression.h"
@@ -163,7 +163,7 @@ public:
std::vector<std::string> generate(unsigned int label, symbolic_expressions_container const & symbolic_expressions, cl::Device const & device);
virtual int check_invalid(symbolic_expressions_container const & symbolic_expressions, cl::Device const & device) const = 0;
virtual void enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, symbolic_expressions_container const & symbolic_expressions) = 0;
virtual tools::shared_ptr<base> clone() const = 0;
private:

View File

@@ -27,7 +27,7 @@ public:
maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions);
};
}

View File

@@ -41,7 +41,7 @@ private:
void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
array_infos const & A, array_infos const & B, array_infos const & C,
value_scalar const & alpha, value_scalar const & beta,
std::vector<cl::lazy_compiler> & programs, unsigned int label, int id);
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id);
array_infos create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap);
std::vector<int_t> infos(symbolic_expressions_container const & symbolic_expressions,
lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B);
@@ -49,7 +49,7 @@ public:
mproduct(mproduct::parameters_type const & parameters, char A_trans, char B_trans);
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label,
symbolic_expressions_container const & symbolic_expressions);

View File

@@ -35,7 +35,7 @@ private:
std::vector<std::string> generate_impl(unsigned int, symbolic_expressions_container const &, std::vector<mapping_type> const &) const;
public:
virtual std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue,std::vector<cl::lazy_compiler> & programs,unsigned int label, symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue,std::vector<cl_ext::lazy_compiler> & programs,unsigned int label, symbolic_expressions_container const & symbolic_expressions);
private:
reduction_type reduction_type_;
};

View File

@@ -30,7 +30,7 @@ public:
reduction(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label,
symbolic_expressions_container const & symbolic_expressions);
private:

View File

@@ -23,7 +23,7 @@ public:
vaxpy(vaxpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
vaxpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
void enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs,
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, symbolic_expressions_container const & symbolic_expressions);
};

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,8 @@
namespace atidlas
{
namespace cl
namespace cl_ext
{
struct compare{

View File

@@ -1,13 +1,13 @@
#ifndef ATIDLAS_CL_LAZY_COMPILER_H
#define ATIDLAS_CL_LAZY_COMPILER_H
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/cl/program_map.h"
namespace atidlas
{
namespace cl
namespace cl_ext
{
class lazy_compiler

View File

@@ -2,12 +2,12 @@
#define ATIDLAS_CL_PROGRAM_MAP_H
#include <map>
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
namespace atidlas
{
namespace cl
namespace cl_ext
{
class program_map

View File

@@ -2,16 +2,16 @@
#define ATIDLAS_CL_QUEUES_H
#include <map>
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/cl/compare.hpp"
namespace atidlas
{
namespace cl
namespace cl_ext
{
typedef std::map<cl::Program, cl::Kernel, cl::compare> kernels_t;
typedef std::map<cl::Program, cl::Kernel, cl_ext::compare> kernels_t;
typedef std::vector<std::pair<cl::Context, std::vector<cl::CommandQueue> > > queues_t;
queues_t init_queues();

View File

@@ -21,7 +21,7 @@ namespace atidlas
private:
std::string define_extension(std::string const & extensions, std::string const & ext);
inline void fill_program_name(char* program_name, symbolic_expressions_container const & symbolic_expressions, binding_policy_t binding_policy);
std::vector<cl::lazy_compiler>& init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation);
std::vector<cl_ext::lazy_compiler>& init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation);
public:
model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
@@ -36,7 +36,7 @@ namespace atidlas
templates_container templates_;
tools::shared_ptr<predictors::random_forest> predictor_;
std::map<std::vector<int_t>, int> hardcoded_;
std::map<cl_context, std::map<std::string, std::vector<cl::lazy_compiler> > > lazy_programs_;
std::map<cl_context, std::map<std::string, std::vector<cl_ext::lazy_compiler> > > lazy_programs_;
cl::CommandQueue & queue_;
};
@@ -46,7 +46,7 @@ namespace atidlas
model_map_t& get_model_map(cl::CommandQueue & queue);
model& get_model(cl::CommandQueue & queue, expression_type, numeric_type);
extern std::map<cl::CommandQueue, model_map_t, cl::compare> models;
extern std::map<cl::CommandQueue, model_map_t, cl_ext::compare> models;
}

View File

@@ -1,7 +1,7 @@
#ifndef _ATIDLAS_SCHEDULER_EXECUTE_H
#define _ATIDLAS_SCHEDULER_EXECUTE_H
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/model/model.h"
#include "atidlas/symbolic/expression.h"

View File

@@ -5,7 +5,7 @@
#include <list>
#include "atidlas/types.h"
#include "atidlas/value_scalar.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/tools/shared_ptr.hpp"
namespace atidlas

View File

@@ -1,7 +1,7 @@
#ifndef ATIDLAS_TYPES_H
#define ATIDLAS_TYPES_H
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/exception/unknown_datatype.h"
namespace atidlas

View File

@@ -2,7 +2,7 @@
#define ATIDLAS_VALUE_SCALAR_H
#include "atidlas/types.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
namespace atidlas
{

View File

@@ -1,7 +1,7 @@
#include <cassert>
#include "atidlas/array.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/exception/unknown_datatype.h"
#include "atidlas/model/model.h"
#include "atidlas/symbolic/execute.h"
@@ -131,7 +131,7 @@ int_t array::dsize() const
array & array::operator=(array const & rhs)
{
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
cl::CommandQueue & queue = cl::get_queue(context_, 0);
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
model_map_t & mmap = atidlas::get_model_map(queue);
execute(expression, mmap);
return *this;
@@ -140,7 +140,7 @@ array & array::operator=(array const & rhs)
array & array::operator=(array_expression const & rhs)
{
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), shape_);
cl::CommandQueue & queue = cl::get_queue(context_, 0);
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
model_map_t & mmap = atidlas::get_model_map(queue);
execute(expression, mmap);
return *this;
@@ -243,7 +243,7 @@ namespace detail
template<class T>
void copy(cl::Context & ctx, cl::Buffer const & data, T value)
{
cl::get_queue(ctx, 0).enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
cl_ext::get_queue(ctx, 0).enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
}
}
@@ -282,7 +282,7 @@ T scalar::cast() const
int_t dtsize = size_of(dtype_);
#define HANDLE_CASE(DTYPE, VAL) \
case DTYPE:\
cl::get_queue(context_, 0).enqueueReadBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v.VAL);\
cl_ext::get_queue(context_, 0).enqueueReadBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v.VAL);\
return v.VAL
switch(dtype_)
@@ -305,7 +305,7 @@ case DTYPE:\
scalar& scalar::operator=(value_scalar const & s)
{
cl::CommandQueue& queue = cl::get_queue(context_, 0);
cl::CommandQueue& queue = cl_ext::get_queue(context_, 0);
int_t dtsize = size_of(dtype_);
#define HANDLE_CASE(TYPE, CLTYPE) case TYPE:\
@@ -727,7 +727,7 @@ void copy(void const * data, array& x, cl::CommandQueue & queue, bool blocking)
x = tmp;
}
if(blocking)
cl::synchronize(x.context());
cl_ext::synchronize(x.context());
}
void copy(array const & x, void* data, cl::CommandQueue & queue, bool blocking)
@@ -744,14 +744,14 @@ void copy(array const & x, void* data, cl::CommandQueue & queue, bool blocking)
queue.enqueueReadBuffer(tmp.data(), CL_FALSE, 0, tmp.dsize()*dtypesize, data);
}
if(blocking)
cl::synchronize(x.context());
cl_ext::synchronize(x.context());
}
void copy(void const *data, array &x, bool blocking)
{ copy(data, x, cl::get_queue(x.context(), 0), blocking); }
{ copy(data, x, cl_ext::get_queue(x.context(), 0), blocking); }
void copy(array const & x, void* data, bool blocking)
{ copy(x, data, cl::get_queue(x.context(), 0), blocking); }
{ copy(x, data, cl_ext::get_queue(x.context(), 0), blocking); }
//std::vector<>
template<class T>
@@ -776,11 +776,11 @@ void copy(array const & x, std::vector<T> & cx, cl::CommandQueue & queue, bool b
template<class T>
void copy(std::vector<T> const & cx, array & x, bool blocking)
{ copy(cx, x, cl::get_queue(x.context(), 0), blocking); }
{ copy(cx, x, cl_ext::get_queue(x.context(), 0), blocking); }
template<class T>
void copy(array const & x, std::vector<T> & cx, bool blocking)
{ copy(x, cx, cl::get_queue(x.context(), 0), blocking); }
{ copy(x, cx, cl_ext::get_queue(x.context(), 0), blocking); }
#define INSTANTIATE(T) \
template void copy<T>(std::vector<T> const &, array &, cl::CommandQueue&, bool);\

View File

@@ -105,7 +105,7 @@ std::vector<int_t> maxpy::input_sizes(symbolic_expressions_container const & sym
}
void maxpy::enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label,
symbolic_expressions_container const & symbolic_expressions)
{

View File

@@ -568,7 +568,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
void mproduct::enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
array_infos const & A, array_infos const & B, array_infos const & C,
value_scalar const & alpha, value_scalar const & beta,
std::vector<cl::lazy_compiler> & programs, unsigned int label, int id)
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id)
{
if (A.shape1==0 || A.shape2==0 || B.shape1==0 || B.shape2==0 || C.shape1==0 || C.shape2==0)
return;
@@ -646,7 +646,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
return infos(symbolic_expressions, d0, d1, d2);
}
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions)
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions)
{
using namespace tools;

View File

@@ -215,7 +215,7 @@ std::vector<int_t> mreduction::input_sizes(symbolic_expressions_container const
}
void mreduction::enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label,
symbolic_expressions_container const & symbolic_expressions)
{

View File

@@ -1,6 +1,6 @@
#include <iostream>
#include "atidlas/backend/templates/reduction.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/tools/to_string.hpp"
#include "atidlas/tools/make_map.hpp"
#include "atidlas/tools/make_vector.hpp"
@@ -281,7 +281,7 @@ std::vector<int_t> reduction::input_sizes(symbolic_expressions_container const &
}
void reduction::enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label,
symbolic_expressions_container const & symbolic_expressions)
{

View File

@@ -106,7 +106,7 @@ std::vector<int_t> vaxpy::input_sizes(symbolic_expressions_container const & sym
}
void vaxpy::enqueue(cl::CommandQueue & queue,
std::vector<cl::lazy_compiler> & programs,
std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label,
symbolic_expressions_container const & symbolic_expressions)
{

View File

@@ -3,7 +3,7 @@
namespace atidlas
{
namespace cl
namespace cl_ext
{
lazy_compiler::lazy_compiler(cl::Context const & ctx, std::string const & name, std::string const & src, bool force_recompilation) :

View File

@@ -9,7 +9,7 @@
namespace atidlas
{
namespace cl
namespace cl_ext
{
program_map::program_map()

View File

@@ -5,7 +5,7 @@
namespace atidlas
{
namespace cl
namespace cl_ext
{
void synchronize(cl::Context const & context)

View File

@@ -42,20 +42,20 @@ void model::fill_program_name(char* program_name, symbolic_expressions_container
delete binder;
}
std::vector<cl::lazy_compiler>& model::init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation)
std::vector<cl_ext::lazy_compiler>& model::init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation)
{
char program_name[256];
fill_program_name(program_name, symbolic_expressions, BIND_TO_HANDLE);
std::string pname(program_name);
std::vector<cl::lazy_compiler> & to_init = lazy_programs_[context()][pname];
std::vector<cl_ext::lazy_compiler> & to_init = lazy_programs_[context()][pname];
if(to_init.empty())
{
std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
to_init.push_back(cl::lazy_compiler(context, pname, force_recompilation));
to_init.push_back(cl_ext::lazy_compiler(context, pname, force_recompilation));
to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
to_init.push_back(cl::lazy_compiler(context, pname + "_fb", force_recompilation));
to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", force_recompilation));
to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
for(size_t i = 0 ; i < templates_.size() ; ++i)
@@ -86,7 +86,7 @@ void model::execute(symbolic_expressions_container const & symbolic_expressions,
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
cl::Device const & device = queue_.getInfo<CL_QUEUE_DEVICE>();
std::vector<cl::lazy_compiler> & compilers = init(symbolic_expressions, context, device, force_recompilation);
std::vector<cl_ext::lazy_compiler> & compilers = init(symbolic_expressions, context, device, force_recompilation);
//Prediction
std::vector<int_t> x = templates_[0]->input_sizes(symbolic_expressions);
@@ -114,7 +114,7 @@ void model::tune(symbolic_expressions_container const & symbolic_expressions)
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
cl::Device device = queue_.getInfo<CL_QUEUE_DEVICE>();
std::vector<cl::lazy_compiler> & compilers = init(symbolic_expressions, context, device, false);
std::vector<cl_ext::lazy_compiler> & compilers = init(symbolic_expressions, context, device, false);
//Collect the timings
std::vector<float> timings(templates_.size());
@@ -265,7 +265,7 @@ model_map_t init_models(cl::CommandQueue & queue)
model_map_t& get_model_map(cl::CommandQueue & queue)
{
std::map<cl::CommandQueue, model_map_t, cl::compare>::iterator it = models.find(queue);
std::map<cl::CommandQueue, model_map_t, cl_ext::compare>::iterator it = models.find(queue);
if(it == models.end())
return models.insert(std::make_pair(queue, init_models(queue))).first->second;
return it->second;
@@ -277,6 +277,6 @@ model& get_model(cl::CommandQueue & queue, expression_type expression, numeric_t
return *get_model_map(queue).at(key);
}
std::map<cl::CommandQueue, model_map_t, cl::compare> models;
std::map<cl::CommandQueue, model_map_t, cl_ext::compare> models;
}

View File

@@ -3,7 +3,7 @@
#include <vector>
#include "atidlas/types.h"
#include "atidlas/array.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/model/model.h"
#include "atidlas/symbolic/expression.h"

View File

@@ -2,7 +2,7 @@
#include <vector>
#include "atidlas/array.h"
#include "atidlas/value_scalar.h"
#include "atidlas/cl/cl.hpp"
#include <CL/cl.hpp>
#include "atidlas/symbolic/expression.h"
namespace atidlas

View File

@@ -17,7 +17,7 @@ namespace boost {
namespace intrusive {
namespace detail {
template<class Derived, bool DoClear = true>
template<class Derived, bool Dcl_extear = true>
class clear_on_destructor_base
{
protected:

View File

@@ -77,12 +77,12 @@ bp::tuple get_shape(atd::array const & x)
// x.reshape(size1, size2);
//}
//boost::python::dict create_queues(atd::cl::queues_t queues)
//boost::python::dict create_queues(atd::cl_ext::queues_t queues)
//{
// boost::python::dict dictionary;
// for (atd::cl::queues_t::iterator it = queues.begin(); it != queues.end(); ++it) {
// for (atd::cl_ext::queues_t::iterator it = queues.begin(); it != queues.end(); ++it) {
// bp::list list;
// for (atd::cl::queues_t::mapped_type::iterator itt = it->second.begin(); itt != it->second.end(); ++itt)
// for (atd::cl_ext::queues_t::mapped_type::iterator itt = it->second.begin(); itt != it->second.end(); ++itt)
// list.append(*itt);
// dictionary[it->first] = list;
// }
@@ -175,7 +175,7 @@ namespace detail
return res;
}
bp::list nv_compute_capability(atd::cl::Device const & device)
bp::list nv_compute_capability(cl::Device const & device)
{
bp::list res;
res.append(device.getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>());
@@ -185,20 +185,20 @@ namespace detail
bp::list get_platforms()
{
std::vector<atd::cl::Platform> platforms;
atd::cl::Platform::get(&platforms);
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
return to_list(platforms.begin(), platforms.end());
}
bp::list get_devices(atd::cl::Platform const & platform)
bp::list get_devices(cl::Platform const & platform)
{
std::vector<atd::cl::Device> devices;
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
return to_list(devices.begin(), devices.end());
}
std::vector<atd::cl::CommandQueue> & get_queue(atd::cl::Context const & ctx)
{ return atd::cl::get_queues(ctx); }
std::vector<cl::CommandQueue> & get_queue(cl::Context const & ctx)
{ return atd::cl_ext::get_queues(ctx); }
atd::numeric_type extract_dtype(bp::object const & odtype)
{
@@ -272,27 +272,27 @@ namespace detail
}
};
atd::cl::Platform get_platform(atd::cl::Device const & device)
{ return atd::cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()); }
cl::Platform get_platform(cl::Device const & device)
{ return cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_device_info, INFO>::param_type
wrap_device_info(atd::cl::Device const & x)
typename cl::detail::param_traits<cl::detail::cl_device_info, INFO>::param_type
wrap_device_info(cl::Device const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_context_info, INFO>::param_type
wrap_context_info(atd::cl::Context const & x)
typename cl::detail::param_traits<cl::detail::cl_context_info, INFO>::param_type
wrap_context_info(cl::Context const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_platform_info, INFO>::param_type
wrap_platform_info(atd::cl::Platform const & x)
typename cl::detail::param_traits<cl::detail::cl_platform_info, INFO>::param_type
wrap_platform_info(cl::Platform const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_command_queue_info, INFO>::param_type
wrap_command_queue_info(atd::cl::CommandQueue const & x)
typename cl::detail::param_traits<cl::detail::cl_command_queue_info, INFO>::param_type
wrap_command_queue_info(cl::CommandQueue const & x)
{ return x.getInfo<INFO>(NULL); }
@@ -309,7 +309,7 @@ namespace detail
void export_cl()
{
typedef std::vector<atd::cl::CommandQueue> queues_t;
typedef std::vector<cl::CommandQueue> queues_t;
bp::class_<queues_t>("queues")
.def("__len__", &queues_t::size)
.def("__getitem__", &bp::vector_indexing_suite<queues_t>::get_item, bp::return_internal_reference<>())
@@ -333,14 +333,14 @@ void export_cl()
bp::def("device_type_to_string", &detail::to_string);
bp::class_<atd::cl::Platform>("platform", bp::no_init)
bp::class_<cl::Platform>("platform", bp::no_init)
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_platform_info<NAME>)
WRAP("name", CL_PLATFORM_NAME)
#undef WRAP
.def("get_devices", &detail::get_devices)
;
bp::class_<atd::cl::Device>("device", bp::no_init)
bp::class_<cl::Device>("device", bp::no_init)
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_device_info<NAME>)
.add_property("nv_compute_capability", &detail::nv_compute_capability)
.add_property("platform", &detail::get_platform)
@@ -351,20 +351,20 @@ void export_cl()
#undef WRAP
;
bp::class_<atd::cl::Context>("context", bp::init<atd::cl::Device>())
bp::class_<cl::Context>("context", bp::init<cl::Device>())
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_context_info<NAME>)
#undef WRAP
.add_property("queues", bp::make_function(&detail::get_queue, bp::return_internal_reference<>()))
;
bp::class_<atd::cl::CommandQueue>("command_queue", bp::init<atd::cl::Context, atd::cl::Device>())
bp::class_<cl::CommandQueue>("command_queue", bp::init<cl::Context, cl::Device>())
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_command_queue_info<NAME>)
WRAP("device", CL_QUEUE_DEVICE)
#undef WRAP
.add_property("models", bp::make_function(&atd::get_model_map, bp::return_internal_reference<>()));
;
bp::def("synchronize", &atd::cl::synchronize);
bp::def("synchronize", &atd::cl_ext::synchronize);
bp::def("get_platforms", &detail::get_platforms);
}
@@ -372,7 +372,7 @@ void export_cl()
namespace detail
{
boost::shared_ptr<atd::array>
ndarray_to_atdarray(const np::ndarray& array, const atd::cl::Context& ctx)
ndarray_to_atdarray(const np::ndarray& array, const cl::Context& ctx)
{
int d = array.get_nd();
@@ -393,12 +393,12 @@ namespace detail
boost::shared_ptr<atd::array> create_array(bp::object const & obj, bp::object odtype, atd::cl::Context context)
boost::shared_ptr<atd::array> create_array(bp::object const & obj, bp::object odtype, cl::Context context)
{
return ndarray_to_atdarray(np::from_object(obj, to_np_dtype(extract_dtype(odtype))), context);
}
boost::shared_ptr<atd::array> create_empty_array(bp::object sizes, bp::object odtype, atd::cl::Context context)
boost::shared_ptr<atd::array> create_empty_array(bp::object sizes, bp::object odtype, cl::Context context)
{
typedef boost::shared_ptr<atd::array> result_type;
@@ -435,7 +435,7 @@ namespace detail
return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
}
boost::shared_ptr<atd::scalar> construct_scalar(bp::object obj, atd::cl::Context const & context)
boost::shared_ptr<atd::scalar> construct_scalar(bp::object obj, cl::Context const & context)
{
typedef boost::shared_ptr<atd::scalar> result_type;
std::string name = type_name(obj);
@@ -504,7 +504,7 @@ void export_array()
bp::class_<atd::array,
boost::shared_ptr<atd::array> >
( "array", bp::no_init)
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl::default_context())))
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl_ext::default_context())))
.def(bp::init<atd::array_expression>())
.add_property("dtype", &atd::array::dtype)
.add_property("context", bp::make_function(&atd::array::context, bp::return_internal_reference<>()))
@@ -527,11 +527,11 @@ void export_array()
bp::class_<atd::scalar, bp::bases<atd::array> >
("scalar", bp::no_init)
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=atd::cl::default_context())))
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=atd::cl_ext::default_context())))
;
//Other numpy-like initializers
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl::default_context()));
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl_ext::default_context()));
//Binary
#define MAP_FUNCTION(name) \
@@ -593,7 +593,7 @@ void export_scalar()
void export_model()
{
bp::class_<atidlas::model>("model", bp::init<atd::base const &, atd::cl::CommandQueue&>())
bp::class_<atidlas::model>("model", bp::init<atd::base const &, cl::CommandQueue&>())
.def("execute", &atd::model::execute);
bp::enum_<atidlas::fetching_policy_type>

View File

@@ -13,7 +13,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
using namespace std;
int failure_count = 0;
ad::cl::Context const & ctx = C.context();
cl::Context const & ctx = C.context();
int_t M = cC.size1();
int_t N = cC.size2();
@@ -94,7 +94,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
}
template<typename T>
void test_impl(T epsilon, ad::cl::Context const & ctx)
void test_impl(T epsilon, cl::Context const & ctx)
{
using atidlas::_;
@@ -119,9 +119,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
int main()
{
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
{
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;

View File

@@ -53,7 +53,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
}
template<typename T>
void test_impl(T epsilon, ad::cl::Context const & ctx)
void test_impl(T epsilon, cl::Context const & ctx)
{
int_t M = 412;
int_t N = 245;
@@ -75,9 +75,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
int main()
{
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
{
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;

View File

@@ -46,7 +46,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
}
template<typename T>
void test_impl(T epsilon, ad::cl::Context const & ctx)
void test_impl(T epsilon, cl::Context const & ctx)
{
int_t M = 1324;
int_t N = 1143;
@@ -65,9 +65,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
int main()
{
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
{
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;

View File

@@ -12,7 +12,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
ad::array & x, ad::array & y)
{
using namespace std;
ad::cl::Context const & ctx = x.context();
cl::Context const & ctx = x.context();
int_t N = cx.size();
unsigned int failure_count = 0;
@@ -52,7 +52,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
}
template<typename T>
void test_impl(T epsilon, ad::cl::Context const & ctx)
void test_impl(T epsilon, cl::Context const & ctx)
{
using atidlas::_;
@@ -74,9 +74,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
int main()
{
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
{
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;

View File

@@ -14,7 +14,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
int failure_count = 0;
ad::numeric_type dtype = x.dtype();
ad::cl::Context const & ctx = x.context();
cl::Context const & ctx = x.context();
int_t N = cz.size();
@@ -89,7 +89,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
}
template<typename T>
void test_impl(T epsilon, ad::cl::Context const & ctx)
void test_impl(T epsilon, cl::Context const & ctx)
{
using atidlas::_;
@@ -114,9 +114,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
int main()
{
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
{
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;