Now using system CL include
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
|
||||
|
||||
namespace ad = atidlas;
|
||||
typedef atidlas::int_t int_t;
|
||||
typedef ad::int_t int_t;
|
||||
|
||||
template<class T>
|
||||
void bench(ad::numeric_type dtype)
|
||||
@@ -31,11 +31,11 @@ void bench(ad::numeric_type dtype)
|
||||
times.clear();\
|
||||
total_time = 0;\
|
||||
OP;\
|
||||
ad::cl::synchronize(ad::cl::default_context());\
|
||||
ad::cl_ext::synchronize(ad::cl_ext::default_context());\
|
||||
while(total_time < 1e-2){\
|
||||
timer.start(); \
|
||||
OP;\
|
||||
ad::cl::synchronize(ad::cl::default_context());\
|
||||
ad::cl_ext::synchronize(ad::cl_ext::default_context());\
|
||||
times.push_back(timer.get());\
|
||||
total_time += times.back();\
|
||||
}\
|
||||
@@ -52,17 +52,17 @@ void bench(ad::numeric_type dtype)
|
||||
int_t N = *it;
|
||||
std::cout << N;
|
||||
/* ATIDLAS */
|
||||
atidlas::array x(N, dtype), y(N, dtype);
|
||||
ad::array x(N, dtype), y(N, dtype);
|
||||
BENCHMARK(y = x + y, bandwidth(3*N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
|
||||
BENCHMARK(clAmdBlasSaxpy(N, 1, x.data()(), 0, 1, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(3*N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
atidlas::copy(x, cx);
|
||||
atidlas::copy(y, cy);
|
||||
ad::copy(x, cx);
|
||||
ad::copy(y, cy);
|
||||
BENCHMARK(cblas_saxpy(N, 1, cx.data(), 1, cy.data(), 1), bandwidth(3*N, tres, dtsize));
|
||||
#endif
|
||||
/* CuBLAS */
|
||||
@@ -84,19 +84,19 @@ void bench(ad::numeric_type dtype)
|
||||
int_t N = *it;
|
||||
std::cout << N;
|
||||
/* ATIDLAS */
|
||||
atidlas::array x(N, dtype), y(N, dtype);
|
||||
atidlas::array scratch(N, dtype);
|
||||
atidlas::scalar s(dtype);
|
||||
ad::array x(N, dtype), y(N, dtype);
|
||||
ad::array scratch(N, dtype);
|
||||
ad::scalar s(dtype);
|
||||
BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &atidlas::cl::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
|
||||
BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
atidlas::copy(x, cx);
|
||||
atidlas::copy(y, cy);
|
||||
ad::copy(x, cx);
|
||||
ad::copy(y, cy);
|
||||
BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
@@ -115,18 +115,18 @@ void bench(ad::numeric_type dtype)
|
||||
int_t N = *Nit;
|
||||
std::cout << M << "," << N;
|
||||
/* ATIDLAS */
|
||||
atidlas::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &atidlas::cl::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
|
||||
BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cA(N*M), cx(N), cy(M);
|
||||
atidlas::copy(x, cx);
|
||||
atidlas::copy(y, cy);
|
||||
atidlas::copy(A, cA);
|
||||
ad::copy(x, cx);
|
||||
ad::copy(y, cy);
|
||||
ad::copy(A, cA);
|
||||
BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
@@ -144,19 +144,19 @@ void bench(ad::numeric_type dtype)
|
||||
int_t M = *Mit, N = *Nit, K = *Kit;
|
||||
std::cout << M << "," << N << "," << K;
|
||||
/* ATIDLAS */
|
||||
atidlas::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype);
|
||||
ad::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype);
|
||||
BENCHMARK(C = dot(A,trans(B)), gflops((double)2*M*N*K, tres));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
BENCHMARK(clAmdBlasSgemm(clAmdBlasColumnMajor, clAmdBlasNoTrans, clAmdBlasTrans, M, N, K, 1, A.data()(), A.ld(), B.data()(), B.ld(),
|
||||
0, C.data()(), C.ld(), 1, &atidlas::cl::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres))
|
||||
0, C.data()(), C.ld(), 1, &ad::cl_ext::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
|
||||
atidlas::copy(C, cC);
|
||||
atidlas::copy(A, cA);
|
||||
atidlas::copy(B, cB);
|
||||
ad::copy(C, cC);
|
||||
ad::copy(A, cA);
|
||||
ad::copy(B, cB);
|
||||
BENCHMARK(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), M, cB.data(), N, 1, cC.data(), M), gflops((double)2*M*N*K, tres));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
@@ -171,16 +171,16 @@ int main(int argc, char* argv[])
|
||||
#endif
|
||||
|
||||
int device_idx = 0;
|
||||
if(atidlas::cl::queues.size()>1){
|
||||
atidlas::cl::queues_t & queues = atidlas::cl::queues;
|
||||
if(ad::cl_ext::queues.size()>1){
|
||||
ad::cl_ext::queues_t & queues = ad::cl_ext::queues;
|
||||
if(argc!=2)
|
||||
{
|
||||
std::cerr << "usage : blas-bench [DEVICE_IDX]" << std::endl;
|
||||
std::cout << "Devices available: " << std::endl;
|
||||
unsigned int current=0;
|
||||
for(atidlas::cl::queues_t::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
|
||||
atidlas::cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
|
||||
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << atidlas::cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
|
||||
for(ad::cl_ext::queues_t::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
|
||||
cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
|
||||
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
|
||||
}
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
@@ -188,7 +188,7 @@ int main(int argc, char* argv[])
|
||||
device_idx = atoi(argv[1]);
|
||||
}
|
||||
|
||||
atidlas::cl::default_context_idx = device_idx;
|
||||
ad::cl_ext::default_context_idx = device_idx;
|
||||
std::cout << "#Benchmark : BLAS" << std::endl;
|
||||
std::cout << "#----------------" << std::endl;
|
||||
bench<float>(ad::FLOAT_TYPE);
|
||||
|
@@ -7,19 +7,19 @@ namespace ad = atidlas;
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
{
|
||||
ad::array x(10, ad::FLOAT_TYPE, it->first);
|
||||
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
ad::tools::timer t;
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "-------------------------" << std::endl;
|
||||
x = x + x;
|
||||
ad::cl::synchronize(x.context());
|
||||
ad::cl_ext::synchronize(x.context());
|
||||
t.start();\
|
||||
for(unsigned int i = 0 ; i < 100 ; ++i){
|
||||
x = x + x;
|
||||
ad::cl::synchronize(x.context());
|
||||
ad::cl_ext::synchronize(x.context());
|
||||
}
|
||||
std::cout << "Kernel launch overhead: " << t.get()/100 << std::endl;
|
||||
std::cout << "Expression tree creation:" << std::endl;
|
||||
|
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
#include "atidlas/types.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/cl/queues.h"
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
@@ -18,19 +18,19 @@ class array: public obj_base
|
||||
friend array reshape(array const &, int_t, int_t);
|
||||
public:
|
||||
//1D Constructors
|
||||
array(int_t size1, numeric_type dtype, cl::Context context = cl::default_context());
|
||||
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||
template<typename DT>
|
||||
array(std::vector<DT> const & data, cl::Context context = cl::default_context());
|
||||
array(std::vector<DT> const & data, cl::Context context = cl_ext::default_context());
|
||||
array(array & v, slice const & s1);
|
||||
|
||||
//2D Constructors
|
||||
array(int_t size1, int_t size2, numeric_type dtype, cl::Context context = cl::default_context());
|
||||
array(int_t size1, int_t size2, numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||
template<typename DT>
|
||||
array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context = cl::default_context());
|
||||
array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context = cl_ext::default_context());
|
||||
array(array & M, slice const & s1, slice const & s2);
|
||||
|
||||
//General constructor
|
||||
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl::default_context());
|
||||
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl_ext::default_context());
|
||||
array(array_expression const & proxy);
|
||||
array(array const &);
|
||||
|
||||
@@ -91,9 +91,9 @@ class scalar : public array
|
||||
private:
|
||||
template<class T> T cast() const;
|
||||
public:
|
||||
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl::default_context());
|
||||
explicit scalar(value_scalar value, cl::Context context = cl::default_context());
|
||||
explicit scalar(numeric_type dtype, cl::Context context = cl::default_context());
|
||||
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl_ext::default_context());
|
||||
explicit scalar(value_scalar value, cl::Context context = cl_ext::default_context());
|
||||
explicit scalar(numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||
scalar(array_expression const & proxy);
|
||||
scalar& operator=(value_scalar const &);
|
||||
// scalar& operator=(scalar const & s);
|
||||
@@ -209,8 +209,8 @@ ATIDLAS_DECLARE_REDUCTION(max)
|
||||
ATIDLAS_DECLARE_REDUCTION(min)
|
||||
ATIDLAS_DECLARE_REDUCTION(argmin)
|
||||
|
||||
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl::default_context());
|
||||
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl::default_context());
|
||||
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl_ext::default_context());
|
||||
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl_ext::default_context());
|
||||
array reshape(array const &, int_t, int_t);
|
||||
|
||||
//
|
||||
|
@@ -2,7 +2,7 @@
|
||||
#define ATIDLAS_BACKEND_BINDER_H
|
||||
|
||||
#include <map>
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
@@ -8,7 +8,7 @@
|
||||
#include "atidlas/types.h"
|
||||
#include "atidlas/backend/parse.h"
|
||||
#include "atidlas/backend/stream.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/cl/lazy_compiler.h"
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
@@ -163,7 +163,7 @@ public:
|
||||
std::vector<std::string> generate(unsigned int label, symbolic_expressions_container const & symbolic_expressions, cl::Device const & device);
|
||||
virtual int check_invalid(symbolic_expressions_container const & symbolic_expressions, cl::Device const & device) const = 0;
|
||||
virtual void enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label, symbolic_expressions_container const & symbolic_expressions) = 0;
|
||||
virtual tools::shared_ptr<base> clone() const = 0;
|
||||
private:
|
||||
|
@@ -27,7 +27,7 @@ public:
|
||||
maxpy(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||
maxpy(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
|
||||
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions);
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -41,7 +41,7 @@ private:
|
||||
void enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
|
||||
array_infos const & A, array_infos const & B, array_infos const & C,
|
||||
value_scalar const & alpha, value_scalar const & beta,
|
||||
std::vector<cl::lazy_compiler> & programs, unsigned int label, int id);
|
||||
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id);
|
||||
array_infos create_slice(array_infos & M, int_t s0_0, int_t s0_1, int_t s1_0, int_t s1_1, bool swap);
|
||||
std::vector<int_t> infos(symbolic_expressions_container const & symbolic_expressions,
|
||||
lhs_rhs_element & C, lhs_rhs_element & A, lhs_rhs_element & B);
|
||||
@@ -49,7 +49,7 @@ public:
|
||||
mproduct(mproduct::parameters_type const & parameters, char A_trans, char B_trans);
|
||||
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
symbolic_expressions_container const & symbolic_expressions);
|
||||
|
||||
|
@@ -35,7 +35,7 @@ private:
|
||||
std::vector<std::string> generate_impl(unsigned int, symbolic_expressions_container const &, std::vector<mapping_type> const &) const;
|
||||
public:
|
||||
virtual std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue,std::vector<cl::lazy_compiler> & programs,unsigned int label, symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue,std::vector<cl_ext::lazy_compiler> & programs,unsigned int label, symbolic_expressions_container const & symbolic_expressions);
|
||||
private:
|
||||
reduction_type reduction_type_;
|
||||
};
|
||||
|
@@ -30,7 +30,7 @@ public:
|
||||
reduction(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_ALL_UNIQUE);
|
||||
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
symbolic_expressions_container const & symbolic_expressions);
|
||||
private:
|
||||
|
@@ -23,7 +23,7 @@ public:
|
||||
vaxpy(vaxpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||
vaxpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_ALL_UNIQUE);
|
||||
std::vector<int_t> input_sizes(symbolic_expressions_container const & symbolic_expressions);
|
||||
void enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs,
|
||||
void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label, symbolic_expressions_container const & symbolic_expressions);
|
||||
};
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,8 @@
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
namespace cl
|
||||
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
struct compare{
|
||||
|
@@ -1,13 +1,13 @@
|
||||
#ifndef ATIDLAS_CL_LAZY_COMPILER_H
|
||||
#define ATIDLAS_CL_LAZY_COMPILER_H
|
||||
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/cl/program_map.h"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
class lazy_compiler
|
||||
|
@@ -2,12 +2,12 @@
|
||||
#define ATIDLAS_CL_PROGRAM_MAP_H
|
||||
|
||||
#include <map>
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
class program_map
|
||||
|
@@ -2,16 +2,16 @@
|
||||
#define ATIDLAS_CL_QUEUES_H
|
||||
|
||||
#include <map>
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/cl/compare.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
typedef std::map<cl::Program, cl::Kernel, cl::compare> kernels_t;
|
||||
typedef std::map<cl::Program, cl::Kernel, cl_ext::compare> kernels_t;
|
||||
typedef std::vector<std::pair<cl::Context, std::vector<cl::CommandQueue> > > queues_t;
|
||||
|
||||
queues_t init_queues();
|
||||
|
@@ -21,7 +21,7 @@ namespace atidlas
|
||||
private:
|
||||
std::string define_extension(std::string const & extensions, std::string const & ext);
|
||||
inline void fill_program_name(char* program_name, symbolic_expressions_container const & symbolic_expressions, binding_policy_t binding_policy);
|
||||
std::vector<cl::lazy_compiler>& init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation);
|
||||
std::vector<cl_ext::lazy_compiler>& init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation);
|
||||
|
||||
public:
|
||||
model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||
@@ -36,7 +36,7 @@ namespace atidlas
|
||||
templates_container templates_;
|
||||
tools::shared_ptr<predictors::random_forest> predictor_;
|
||||
std::map<std::vector<int_t>, int> hardcoded_;
|
||||
std::map<cl_context, std::map<std::string, std::vector<cl::lazy_compiler> > > lazy_programs_;
|
||||
std::map<cl_context, std::map<std::string, std::vector<cl_ext::lazy_compiler> > > lazy_programs_;
|
||||
cl::CommandQueue & queue_;
|
||||
};
|
||||
|
||||
@@ -46,7 +46,7 @@ namespace atidlas
|
||||
model_map_t& get_model_map(cl::CommandQueue & queue);
|
||||
model& get_model(cl::CommandQueue & queue, expression_type, numeric_type);
|
||||
|
||||
extern std::map<cl::CommandQueue, model_map_t, cl::compare> models;
|
||||
extern std::map<cl::CommandQueue, model_map_t, cl_ext::compare> models;
|
||||
|
||||
}
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#ifndef _ATIDLAS_SCHEDULER_EXECUTE_H
|
||||
#define _ATIDLAS_SCHEDULER_EXECUTE_H
|
||||
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/model/model.h"
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#include <list>
|
||||
#include "atidlas/types.h"
|
||||
#include "atidlas/value_scalar.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/tools/shared_ptr.hpp"
|
||||
|
||||
namespace atidlas
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#ifndef ATIDLAS_TYPES_H
|
||||
#define ATIDLAS_TYPES_H
|
||||
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/exception/unknown_datatype.h"
|
||||
|
||||
namespace atidlas
|
||||
|
@@ -2,7 +2,7 @@
|
||||
#define ATIDLAS_VALUE_SCALAR_H
|
||||
|
||||
#include "atidlas/types.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#include <cassert>
|
||||
|
||||
#include "atidlas/array.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/exception/unknown_datatype.h"
|
||||
#include "atidlas/model/model.h"
|
||||
#include "atidlas/symbolic/execute.h"
|
||||
@@ -131,7 +131,7 @@ int_t array::dsize() const
|
||||
array & array::operator=(array const & rhs)
|
||||
{
|
||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
|
||||
cl::CommandQueue & queue = cl::get_queue(context_, 0);
|
||||
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
|
||||
model_map_t & mmap = atidlas::get_model_map(queue);
|
||||
execute(expression, mmap);
|
||||
return *this;
|
||||
@@ -140,7 +140,7 @@ array & array::operator=(array const & rhs)
|
||||
array & array::operator=(array_expression const & rhs)
|
||||
{
|
||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), shape_);
|
||||
cl::CommandQueue & queue = cl::get_queue(context_, 0);
|
||||
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
|
||||
model_map_t & mmap = atidlas::get_model_map(queue);
|
||||
execute(expression, mmap);
|
||||
return *this;
|
||||
@@ -243,7 +243,7 @@ namespace detail
|
||||
template<class T>
|
||||
void copy(cl::Context & ctx, cl::Buffer const & data, T value)
|
||||
{
|
||||
cl::get_queue(ctx, 0).enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
|
||||
cl_ext::get_queue(ctx, 0).enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -282,7 +282,7 @@ T scalar::cast() const
|
||||
int_t dtsize = size_of(dtype_);
|
||||
#define HANDLE_CASE(DTYPE, VAL) \
|
||||
case DTYPE:\
|
||||
cl::get_queue(context_, 0).enqueueReadBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v.VAL);\
|
||||
cl_ext::get_queue(context_, 0).enqueueReadBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v.VAL);\
|
||||
return v.VAL
|
||||
|
||||
switch(dtype_)
|
||||
@@ -305,7 +305,7 @@ case DTYPE:\
|
||||
|
||||
scalar& scalar::operator=(value_scalar const & s)
|
||||
{
|
||||
cl::CommandQueue& queue = cl::get_queue(context_, 0);
|
||||
cl::CommandQueue& queue = cl_ext::get_queue(context_, 0);
|
||||
int_t dtsize = size_of(dtype_);
|
||||
|
||||
#define HANDLE_CASE(TYPE, CLTYPE) case TYPE:\
|
||||
@@ -727,7 +727,7 @@ void copy(void const * data, array& x, cl::CommandQueue & queue, bool blocking)
|
||||
x = tmp;
|
||||
}
|
||||
if(blocking)
|
||||
cl::synchronize(x.context());
|
||||
cl_ext::synchronize(x.context());
|
||||
}
|
||||
|
||||
void copy(array const & x, void* data, cl::CommandQueue & queue, bool blocking)
|
||||
@@ -744,14 +744,14 @@ void copy(array const & x, void* data, cl::CommandQueue & queue, bool blocking)
|
||||
queue.enqueueReadBuffer(tmp.data(), CL_FALSE, 0, tmp.dsize()*dtypesize, data);
|
||||
}
|
||||
if(blocking)
|
||||
cl::synchronize(x.context());
|
||||
cl_ext::synchronize(x.context());
|
||||
}
|
||||
|
||||
void copy(void const *data, array &x, bool blocking)
|
||||
{ copy(data, x, cl::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(data, x, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
|
||||
void copy(array const & x, void* data, bool blocking)
|
||||
{ copy(x, data, cl::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(x, data, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
|
||||
//std::vector<>
|
||||
template<class T>
|
||||
@@ -776,11 +776,11 @@ void copy(array const & x, std::vector<T> & cx, cl::CommandQueue & queue, bool b
|
||||
|
||||
template<class T>
|
||||
void copy(std::vector<T> const & cx, array & x, bool blocking)
|
||||
{ copy(cx, x, cl::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(cx, x, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
|
||||
template<class T>
|
||||
void copy(array const & x, std::vector<T> & cx, bool blocking)
|
||||
{ copy(x, cx, cl::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(x, cx, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
|
||||
#define INSTANTIATE(T) \
|
||||
template void copy<T>(std::vector<T> const &, array &, cl::CommandQueue&, bool);\
|
||||
|
@@ -105,7 +105,7 @@ std::vector<int_t> maxpy::input_sizes(symbolic_expressions_container const & sym
|
||||
}
|
||||
|
||||
void maxpy::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
|
@@ -568,7 +568,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
void mproduct::enqueue_block(cl::CommandQueue & queue, int_t M, int_t N, int_t K,
|
||||
array_infos const & A, array_infos const & B, array_infos const & C,
|
||||
value_scalar const & alpha, value_scalar const & beta,
|
||||
std::vector<cl::lazy_compiler> & programs, unsigned int label, int id)
|
||||
std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, int id)
|
||||
{
|
||||
if (A.shape1==0 || A.shape2==0 || B.shape1==0 || B.shape2==0 || C.shape1==0 || C.shape2==0)
|
||||
return;
|
||||
@@ -646,7 +646,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
return infos(symbolic_expressions, d0, d1, d2);
|
||||
}
|
||||
|
||||
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions)
|
||||
void mproduct::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs, unsigned int label, symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
using namespace tools;
|
||||
|
||||
|
@@ -215,7 +215,7 @@ std::vector<int_t> mreduction::input_sizes(symbolic_expressions_container const
|
||||
}
|
||||
|
||||
void mreduction::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include <iostream>
|
||||
#include "atidlas/backend/templates/reduction.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/tools/to_string.hpp"
|
||||
#include "atidlas/tools/make_map.hpp"
|
||||
#include "atidlas/tools/make_vector.hpp"
|
||||
@@ -281,7 +281,7 @@ std::vector<int_t> reduction::input_sizes(symbolic_expressions_container const &
|
||||
}
|
||||
|
||||
void reduction::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
|
@@ -106,7 +106,7 @@ std::vector<int_t> vaxpy::input_sizes(symbolic_expressions_container const & sym
|
||||
}
|
||||
|
||||
void vaxpy::enqueue(cl::CommandQueue & queue,
|
||||
std::vector<cl::lazy_compiler> & programs,
|
||||
std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label,
|
||||
symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
|
@@ -3,7 +3,7 @@
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
lazy_compiler::lazy_compiler(cl::Context const & ctx, std::string const & name, std::string const & src, bool force_recompilation) :
|
||||
|
@@ -9,7 +9,7 @@
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
program_map::program_map()
|
||||
|
@@ -5,7 +5,7 @@
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
void synchronize(cl::Context const & context)
|
||||
|
@@ -42,20 +42,20 @@ void model::fill_program_name(char* program_name, symbolic_expressions_container
|
||||
delete binder;
|
||||
}
|
||||
|
||||
std::vector<cl::lazy_compiler>& model::init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation)
|
||||
std::vector<cl_ext::lazy_compiler>& model::init(symbolic_expressions_container const & symbolic_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation)
|
||||
{
|
||||
char program_name[256];
|
||||
fill_program_name(program_name, symbolic_expressions, BIND_TO_HANDLE);
|
||||
std::string pname(program_name);
|
||||
std::vector<cl::lazy_compiler> & to_init = lazy_programs_[context()][pname];
|
||||
std::vector<cl_ext::lazy_compiler> & to_init = lazy_programs_[context()][pname];
|
||||
if(to_init.empty())
|
||||
{
|
||||
std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
|
||||
|
||||
to_init.push_back(cl::lazy_compiler(context, pname, force_recompilation));
|
||||
to_init.push_back(cl_ext::lazy_compiler(context, pname, force_recompilation));
|
||||
to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
|
||||
|
||||
to_init.push_back(cl::lazy_compiler(context, pname + "_fb", force_recompilation));
|
||||
to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", force_recompilation));
|
||||
to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
|
||||
|
||||
for(size_t i = 0 ; i < templates_.size() ; ++i)
|
||||
@@ -86,7 +86,7 @@ void model::execute(symbolic_expressions_container const & symbolic_expressions,
|
||||
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
|
||||
cl::Device const & device = queue_.getInfo<CL_QUEUE_DEVICE>();
|
||||
|
||||
std::vector<cl::lazy_compiler> & compilers = init(symbolic_expressions, context, device, force_recompilation);
|
||||
std::vector<cl_ext::lazy_compiler> & compilers = init(symbolic_expressions, context, device, force_recompilation);
|
||||
|
||||
//Prediction
|
||||
std::vector<int_t> x = templates_[0]->input_sizes(symbolic_expressions);
|
||||
@@ -114,7 +114,7 @@ void model::tune(symbolic_expressions_container const & symbolic_expressions)
|
||||
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
|
||||
cl::Device device = queue_.getInfo<CL_QUEUE_DEVICE>();
|
||||
|
||||
std::vector<cl::lazy_compiler> & compilers = init(symbolic_expressions, context, device, false);
|
||||
std::vector<cl_ext::lazy_compiler> & compilers = init(symbolic_expressions, context, device, false);
|
||||
|
||||
//Collect the timings
|
||||
std::vector<float> timings(templates_.size());
|
||||
@@ -265,7 +265,7 @@ model_map_t init_models(cl::CommandQueue & queue)
|
||||
|
||||
model_map_t& get_model_map(cl::CommandQueue & queue)
|
||||
{
|
||||
std::map<cl::CommandQueue, model_map_t, cl::compare>::iterator it = models.find(queue);
|
||||
std::map<cl::CommandQueue, model_map_t, cl_ext::compare>::iterator it = models.find(queue);
|
||||
if(it == models.end())
|
||||
return models.insert(std::make_pair(queue, init_models(queue))).first->second;
|
||||
return it->second;
|
||||
@@ -277,6 +277,6 @@ model& get_model(cl::CommandQueue & queue, expression_type expression, numeric_t
|
||||
return *get_model_map(queue).at(key);
|
||||
}
|
||||
|
||||
std::map<cl::CommandQueue, model_map_t, cl::compare> models;
|
||||
std::map<cl::CommandQueue, model_map_t, cl_ext::compare> models;
|
||||
|
||||
}
|
||||
|
@@ -3,7 +3,7 @@
|
||||
#include <vector>
|
||||
#include "atidlas/types.h"
|
||||
#include "atidlas/array.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/model/model.h"
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
#include <vector>
|
||||
#include "atidlas/array.h"
|
||||
#include "atidlas/value_scalar.h"
|
||||
#include "atidlas/cl/cl.hpp"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
namespace atidlas
|
||||
|
@@ -17,7 +17,7 @@ namespace boost {
|
||||
namespace intrusive {
|
||||
namespace detail {
|
||||
|
||||
template<class Derived, bool DoClear = true>
|
||||
template<class Derived, bool Dcl_extear = true>
|
||||
class clear_on_destructor_base
|
||||
{
|
||||
protected:
|
||||
|
@@ -77,12 +77,12 @@ bp::tuple get_shape(atd::array const & x)
|
||||
// x.reshape(size1, size2);
|
||||
//}
|
||||
|
||||
//boost::python::dict create_queues(atd::cl::queues_t queues)
|
||||
//boost::python::dict create_queues(atd::cl_ext::queues_t queues)
|
||||
//{
|
||||
// boost::python::dict dictionary;
|
||||
// for (atd::cl::queues_t::iterator it = queues.begin(); it != queues.end(); ++it) {
|
||||
// for (atd::cl_ext::queues_t::iterator it = queues.begin(); it != queues.end(); ++it) {
|
||||
// bp::list list;
|
||||
// for (atd::cl::queues_t::mapped_type::iterator itt = it->second.begin(); itt != it->second.end(); ++itt)
|
||||
// for (atd::cl_ext::queues_t::mapped_type::iterator itt = it->second.begin(); itt != it->second.end(); ++itt)
|
||||
// list.append(*itt);
|
||||
// dictionary[it->first] = list;
|
||||
// }
|
||||
@@ -175,7 +175,7 @@ namespace detail
|
||||
return res;
|
||||
}
|
||||
|
||||
bp::list nv_compute_capability(atd::cl::Device const & device)
|
||||
bp::list nv_compute_capability(cl::Device const & device)
|
||||
{
|
||||
bp::list res;
|
||||
res.append(device.getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>());
|
||||
@@ -185,20 +185,20 @@ namespace detail
|
||||
|
||||
bp::list get_platforms()
|
||||
{
|
||||
std::vector<atd::cl::Platform> platforms;
|
||||
atd::cl::Platform::get(&platforms);
|
||||
std::vector<cl::Platform> platforms;
|
||||
cl::Platform::get(&platforms);
|
||||
return to_list(platforms.begin(), platforms.end());
|
||||
}
|
||||
|
||||
bp::list get_devices(atd::cl::Platform const & platform)
|
||||
bp::list get_devices(cl::Platform const & platform)
|
||||
{
|
||||
std::vector<atd::cl::Device> devices;
|
||||
std::vector<cl::Device> devices;
|
||||
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||||
return to_list(devices.begin(), devices.end());
|
||||
}
|
||||
|
||||
std::vector<atd::cl::CommandQueue> & get_queue(atd::cl::Context const & ctx)
|
||||
{ return atd::cl::get_queues(ctx); }
|
||||
std::vector<cl::CommandQueue> & get_queue(cl::Context const & ctx)
|
||||
{ return atd::cl_ext::get_queues(ctx); }
|
||||
|
||||
atd::numeric_type extract_dtype(bp::object const & odtype)
|
||||
{
|
||||
@@ -272,27 +272,27 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
atd::cl::Platform get_platform(atd::cl::Device const & device)
|
||||
{ return atd::cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()); }
|
||||
cl::Platform get_platform(cl::Device const & device)
|
||||
{ return cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_device_info, INFO>::param_type
|
||||
wrap_device_info(atd::cl::Device const & x)
|
||||
typename cl::detail::param_traits<cl::detail::cl_device_info, INFO>::param_type
|
||||
wrap_device_info(cl::Device const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_context_info, INFO>::param_type
|
||||
wrap_context_info(atd::cl::Context const & x)
|
||||
typename cl::detail::param_traits<cl::detail::cl_context_info, INFO>::param_type
|
||||
wrap_context_info(cl::Context const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_platform_info, INFO>::param_type
|
||||
wrap_platform_info(atd::cl::Platform const & x)
|
||||
typename cl::detail::param_traits<cl::detail::cl_platform_info, INFO>::param_type
|
||||
wrap_platform_info(cl::Platform const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_command_queue_info, INFO>::param_type
|
||||
wrap_command_queue_info(atd::cl::CommandQueue const & x)
|
||||
typename cl::detail::param_traits<cl::detail::cl_command_queue_info, INFO>::param_type
|
||||
wrap_command_queue_info(cl::CommandQueue const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
|
||||
@@ -309,7 +309,7 @@ namespace detail
|
||||
|
||||
void export_cl()
|
||||
{
|
||||
typedef std::vector<atd::cl::CommandQueue> queues_t;
|
||||
typedef std::vector<cl::CommandQueue> queues_t;
|
||||
bp::class_<queues_t>("queues")
|
||||
.def("__len__", &queues_t::size)
|
||||
.def("__getitem__", &bp::vector_indexing_suite<queues_t>::get_item, bp::return_internal_reference<>())
|
||||
@@ -333,14 +333,14 @@ void export_cl()
|
||||
bp::def("device_type_to_string", &detail::to_string);
|
||||
|
||||
|
||||
bp::class_<atd::cl::Platform>("platform", bp::no_init)
|
||||
bp::class_<cl::Platform>("platform", bp::no_init)
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_platform_info<NAME>)
|
||||
WRAP("name", CL_PLATFORM_NAME)
|
||||
#undef WRAP
|
||||
.def("get_devices", &detail::get_devices)
|
||||
;
|
||||
|
||||
bp::class_<atd::cl::Device>("device", bp::no_init)
|
||||
bp::class_<cl::Device>("device", bp::no_init)
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_device_info<NAME>)
|
||||
.add_property("nv_compute_capability", &detail::nv_compute_capability)
|
||||
.add_property("platform", &detail::get_platform)
|
||||
@@ -351,20 +351,20 @@ void export_cl()
|
||||
#undef WRAP
|
||||
;
|
||||
|
||||
bp::class_<atd::cl::Context>("context", bp::init<atd::cl::Device>())
|
||||
bp::class_<cl::Context>("context", bp::init<cl::Device>())
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_context_info<NAME>)
|
||||
#undef WRAP
|
||||
.add_property("queues", bp::make_function(&detail::get_queue, bp::return_internal_reference<>()))
|
||||
;
|
||||
|
||||
bp::class_<atd::cl::CommandQueue>("command_queue", bp::init<atd::cl::Context, atd::cl::Device>())
|
||||
bp::class_<cl::CommandQueue>("command_queue", bp::init<cl::Context, cl::Device>())
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_command_queue_info<NAME>)
|
||||
WRAP("device", CL_QUEUE_DEVICE)
|
||||
#undef WRAP
|
||||
.add_property("models", bp::make_function(&atd::get_model_map, bp::return_internal_reference<>()));
|
||||
;
|
||||
|
||||
bp::def("synchronize", &atd::cl::synchronize);
|
||||
bp::def("synchronize", &atd::cl_ext::synchronize);
|
||||
bp::def("get_platforms", &detail::get_platforms);
|
||||
|
||||
}
|
||||
@@ -372,7 +372,7 @@ void export_cl()
|
||||
namespace detail
|
||||
{
|
||||
boost::shared_ptr<atd::array>
|
||||
ndarray_to_atdarray(const np::ndarray& array, const atd::cl::Context& ctx)
|
||||
ndarray_to_atdarray(const np::ndarray& array, const cl::Context& ctx)
|
||||
{
|
||||
|
||||
int d = array.get_nd();
|
||||
@@ -393,12 +393,12 @@ namespace detail
|
||||
|
||||
|
||||
|
||||
boost::shared_ptr<atd::array> create_array(bp::object const & obj, bp::object odtype, atd::cl::Context context)
|
||||
boost::shared_ptr<atd::array> create_array(bp::object const & obj, bp::object odtype, cl::Context context)
|
||||
{
|
||||
return ndarray_to_atdarray(np::from_object(obj, to_np_dtype(extract_dtype(odtype))), context);
|
||||
}
|
||||
|
||||
boost::shared_ptr<atd::array> create_empty_array(bp::object sizes, bp::object odtype, atd::cl::Context context)
|
||||
boost::shared_ptr<atd::array> create_empty_array(bp::object sizes, bp::object odtype, cl::Context context)
|
||||
{
|
||||
typedef boost::shared_ptr<atd::array> result_type;
|
||||
|
||||
@@ -435,7 +435,7 @@ namespace detail
|
||||
return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
|
||||
}
|
||||
|
||||
boost::shared_ptr<atd::scalar> construct_scalar(bp::object obj, atd::cl::Context const & context)
|
||||
boost::shared_ptr<atd::scalar> construct_scalar(bp::object obj, cl::Context const & context)
|
||||
{
|
||||
typedef boost::shared_ptr<atd::scalar> result_type;
|
||||
std::string name = type_name(obj);
|
||||
@@ -504,7 +504,7 @@ void export_array()
|
||||
bp::class_<atd::array,
|
||||
boost::shared_ptr<atd::array> >
|
||||
( "array", bp::no_init)
|
||||
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl::default_context())))
|
||||
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl_ext::default_context())))
|
||||
.def(bp::init<atd::array_expression>())
|
||||
.add_property("dtype", &atd::array::dtype)
|
||||
.add_property("context", bp::make_function(&atd::array::context, bp::return_internal_reference<>()))
|
||||
@@ -527,11 +527,11 @@ void export_array()
|
||||
|
||||
bp::class_<atd::scalar, bp::bases<atd::array> >
|
||||
("scalar", bp::no_init)
|
||||
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=atd::cl::default_context())))
|
||||
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=atd::cl_ext::default_context())))
|
||||
;
|
||||
|
||||
//Other numpy-like initializers
|
||||
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl::default_context()));
|
||||
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl_ext::default_context()));
|
||||
|
||||
//Binary
|
||||
#define MAP_FUNCTION(name) \
|
||||
@@ -593,7 +593,7 @@ void export_scalar()
|
||||
void export_model()
|
||||
{
|
||||
|
||||
bp::class_<atidlas::model>("model", bp::init<atd::base const &, atd::cl::CommandQueue&>())
|
||||
bp::class_<atidlas::model>("model", bp::init<atd::base const &, cl::CommandQueue&>())
|
||||
.def("execute", &atd::model::execute);
|
||||
|
||||
bp::enum_<atidlas::fetching_policy_type>
|
||||
|
@@ -13,7 +13,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
|
||||
using namespace std;
|
||||
|
||||
int failure_count = 0;
|
||||
ad::cl::Context const & ctx = C.context();
|
||||
cl::Context const & ctx = C.context();
|
||||
|
||||
int_t M = cC.size1();
|
||||
int_t N = cC.size2();
|
||||
@@ -94,7 +94,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
void test_impl(T epsilon, cl::Context const & ctx)
|
||||
{
|
||||
using atidlas::_;
|
||||
|
||||
@@ -119,9 +119,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
{
|
||||
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
|
@@ -53,7 +53,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
void test_impl(T epsilon, cl::Context const & ctx)
|
||||
{
|
||||
int_t M = 412;
|
||||
int_t N = 245;
|
||||
@@ -75,9 +75,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
{
|
||||
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
|
@@ -46,7 +46,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
void test_impl(T epsilon, cl::Context const & ctx)
|
||||
{
|
||||
int_t M = 1324;
|
||||
int_t N = 1143;
|
||||
@@ -65,9 +65,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
{
|
||||
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
|
@@ -12,7 +12,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
|
||||
ad::array & x, ad::array & y)
|
||||
{
|
||||
using namespace std;
|
||||
ad::cl::Context const & ctx = x.context();
|
||||
cl::Context const & ctx = x.context();
|
||||
int_t N = cx.size();
|
||||
unsigned int failure_count = 0;
|
||||
|
||||
@@ -52,7 +52,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
void test_impl(T epsilon, cl::Context const & ctx)
|
||||
{
|
||||
using atidlas::_;
|
||||
|
||||
@@ -74,9 +74,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
{
|
||||
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
|
@@ -14,7 +14,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
|
||||
|
||||
int failure_count = 0;
|
||||
ad::numeric_type dtype = x.dtype();
|
||||
ad::cl::Context const & ctx = x.context();
|
||||
cl::Context const & ctx = x.context();
|
||||
|
||||
int_t N = cz.size();
|
||||
|
||||
@@ -89,7 +89,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
void test_impl(T epsilon, cl::Context const & ctx)
|
||||
{
|
||||
using atidlas::_;
|
||||
|
||||
@@ -114,9 +114,9 @@ void test_impl(T epsilon, ad::cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl::queues_t::iterator it = ad::cl::queues.begin() ; it != ad::cl::queues.end() ; ++it)
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
{
|
||||
ad::cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
|
Reference in New Issue
Block a user