Ported to C++11

This commit is contained in:
Philippe Tillet
2015-02-04 22:06:15 -05:00
parent 3a296ae3b7
commit bbf2f0188e
40 changed files with 413 additions and 563 deletions

View File

@@ -46,13 +46,13 @@ void bench(ad::numeric_type dtype)
/*--BLAS1--*/
/*---------*/
std::cout << "#AXPY" << std::endl;
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
for(auto N : BLAS1_N)
{
int_t N = *it;
std::cout << N;
/* ATIDLAS */
ad::array x(N, dtype), y(N, dtype);
cl::CommandQueue & queue = ad::cl_ext::get_queue(x.context(), 0);
cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
ad::model & model = ad::get_model(queue, ad::VECTOR_AXPY_TYPE, dtype);
ad::array_expression E = ad::detail::assign(y, x + y);
model.tune(E);
@@ -178,15 +178,16 @@ int main(int argc, char* argv[])
#endif
int device_idx = 0;
if(ad::cl_ext::queues.size()>1){
ad::cl_ext::queues_t & queues = ad::cl_ext::queues;
ad::cl_ext::queues_type::data_type const & queues = ad::cl_ext::queues.data();
if(queues.size()>1){
if(argc!=2)
{
std::cerr << "usage : blas-bench [DEVICE_IDX]" << std::endl;
std::cout << "Devices available: " << std::endl;
unsigned int current=0;
for(ad::cl_ext::queues_t::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
for(const auto & queue : queues){
cl::Device device = queue.first.getInfo<CL_CONTEXT_DEVICES>()[0];
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
}
exit(EXIT_FAILURE);

View File

@@ -3,6 +3,7 @@
#include "vector"
#include <cmath>
#include <algorithm>
int ceil(int N, int pad)
{

View File

@@ -12,9 +12,9 @@ __global__ void dummy(){}
int main()
{
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
for(const auto & elem : ad::cl_ext::queues.data())
{
cl::CommandQueue queue = it->second[0];
cl::CommandQueue queue = elem.second[0];
cl::Device device = queue.getInfo<CL_QUEUE_DEVICE>();
cl::Program program("__kernel void dummy(){}");
program.build();

View File

@@ -4,7 +4,7 @@
#include <iostream>
#include <CL/cl.hpp>
#include "atidlas/types.h"
#include "atidlas/cl/queues.h"
#include "atidlas/cl_ext/backend.h"
#include "atidlas/symbolic/expression.h"
@@ -12,11 +12,10 @@ namespace atidlas
{
class scalar;
class array: public obj_base
class array: public array_base
{
friend array reshape(array const &, int_t, int_t);
friend array reshape(array_expression const &, int_t, int_t);
static array_infos init_infos(numeric_type dtype, cl_mem data, int_t shape1, int_t shape2, int_t start1, int_t start2, int_t stride1, int_t stride2, int_t ld);
public:
//1D Constructors
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
@@ -32,15 +31,15 @@ public:
//General constructor
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl_ext::default_context());
array(control const & proxy);
array(array_expression const & proxy);
array(array const &);
//Getters
numeric_type dtype() const;
size4 shape() const;
int_t nshape() const;
size4 start() const;
size4 stride() const;
int_t nshape() const;
int_t ld() const;
cl::Context const & context() const;
cl::Buffer const & data() const;
@@ -51,7 +50,8 @@ public:
//Numeric operators
array& operator=(array const &);
array& operator=(control const &);
array& operator=(array_expression const &);
template<class T> array & operator=(std::vector<T> const & rhs);
array_expression operator-();
@@ -78,9 +78,15 @@ public:
array_expression T() const;
protected:
numeric_type dtype_;
size4 shape_;
size4 start_;
size4 stride_;
int_t ld_;
cl::Context context_;
cl::Buffer data_;
array_infos infos_;
};
class scalar : public array
@@ -91,7 +97,7 @@ public:
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl_ext::default_context());
explicit scalar(value_scalar value, cl::Context context = cl_ext::default_context());
explicit scalar(numeric_type dtype, cl::Context context = cl_ext::default_context());
scalar(control const & proxy);
scalar(array_expression const & proxy);
scalar& operator=(value_scalar const &);
// scalar& operator=(scalar const & s);
using array::operator =;

View File

@@ -20,7 +20,7 @@ enum leaf_t
class mapped_object;
typedef std::pair<int_t, leaf_t> mapping_key;
typedef std::map<mapping_key, tools::shared_ptr<mapped_object> > mapping_type;
typedef std::map<mapping_key, std::shared_ptr<mapped_object> > mapping_type;
/** @brief Mapped Object
*

View File

@@ -9,7 +9,7 @@
#include "atidlas/backend/parse.h"
#include "atidlas/backend/stream.h"
#include <CL/cl.hpp>
#include "atidlas/cl/lazy_compiler.h"
#include "atidlas/cl_ext/lazy_compiler.h"
#include "atidlas/symbolic/expression.h"
namespace atidlas
@@ -75,15 +75,15 @@ protected:
/** @brief Accessor for the numeric type */
numeric_type get_numeric_type(atidlas::array_expression const * array_expression, int_t root_idx) const;
/** @brief Creates a binary leaf */
template<class T> tools::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
template<class T> std::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
/** @brief Creates a value scalar mapping */
tools::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
std::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
/** @brief Creates a vector mapping */
tools::shared_ptr<mapped_object> create(array_infos const &) const;
std::shared_ptr<mapped_object> create(array_infos const &) const;
/** @brief Creates a tuple mapping */
tools::shared_ptr<mapped_object> create(repeat_infos const &) const;
std::shared_ptr<mapped_object> create(repeat_infos const &) const;
/** @brief Creates a mapping */
tools::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
std::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
public:
map_functor(symbolic_binder & binder, mapping_type & mapping);
/** @brief Functor for traversing the tree */
@@ -143,7 +143,7 @@ protected:
static bool is_reduction(array_expression::node const & node);
static bool is_index_reduction(op_element const & op);
tools::shared_ptr<symbolic_binder> make_binder();
std::shared_ptr<symbolic_binder> make_binder();
static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr);
static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr);
static std::string append_width(std::string const & str, unsigned int width);
@@ -164,7 +164,7 @@ public:
virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
virtual void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
unsigned int label, expressions_tuple const & expressions, operation_cache* cache = NULL) = 0;
virtual tools::shared_ptr<base> clone() const = 0;
virtual std::shared_ptr<base> clone() const = 0;
private:
binding_policy_t binding_policy_;
};
@@ -180,7 +180,7 @@ public:
base_impl(parameters_type const & parameters, binding_policy_t binding_policy);
int_t local_size_0() const;
int_t local_size_1() const;
tools::shared_ptr<base> clone() const;
std::shared_ptr<base> clone() const;
/** @brief returns whether or not the profile has undefined behavior on particular device */
int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const;
protected:

View File

@@ -1,31 +0,0 @@
#ifndef ATIDLAS_CL_QUEUES_H
#define ATIDLAS_CL_QUEUES_H
#include <map>
#include <CL/cl.hpp>
#include "atidlas/cl/compare.hpp"
namespace atidlas
{
namespace cl_ext
{
typedef std::map<std::pair<cl_program, unsigned int>, cl::Kernel> kernels_t;
typedef std::vector<std::pair<cl::Context, std::vector<cl::CommandQueue> > > queues_t;
extern kernels_t kernels;
extern queues_t queues;
extern unsigned int default_context_idx;
extern cl_command_queue_properties queue_properties;
void synchronize(cl::Context const & context);
cl::Context default_context();
cl::CommandQueue & get_queue(cl::Context const &, std::size_t);
cl::Device get_device(cl::CommandQueue &);
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx);
}
}
#endif

View File

@@ -0,0 +1,40 @@
#ifndef ATIDLAS_CL_QUEUES_H
#define ATIDLAS_CL_QUEUES_H
#include <map>
#include <list>
#include <CL/cl.hpp>
namespace atidlas
{
namespace cl_ext
{
class queues_type
{
private:
void append(cl::Context const &);
void init();
public:
typedef std::list<std::pair<cl::Context, std::vector<cl::CommandQueue> > > data_type;
std::vector<cl::CommandQueue> & operator[](cl::Context const &);
cl::Context default_context();
data_type const & data();
private:
data_type data_;
};
typedef std::map<std::pair<cl_program, unsigned int>, cl::Kernel> kernels_type;
extern kernels_type kernels;
extern queues_type queues;
extern unsigned int default_context_idx;
extern cl_command_queue_properties queue_properties;
void synchronize(cl::Context const & context);
cl::Context default_context();
}
}
#endif

View File

@@ -2,7 +2,7 @@
#define ATIDLAS_CL_LAZY_COMPILER_H
#include <CL/cl.hpp>
#include "atidlas/cl/program_map.h"
#include "atidlas/cl_ext/program_map.h"
namespace atidlas
{

View File

@@ -6,8 +6,8 @@
#include <map>
#include "atidlas/backend/templates/base.h"
#include "atidlas/cl/compare.hpp"
#include "atidlas/cl/lazy_compiler.h"
#include "atidlas/cl_ext/compare.hpp"
#include "atidlas/cl_ext/lazy_compiler.h"
#include "atidlas/model/predictors/random_forest.h"
#include "atidlas/symbolic/expression.h"
@@ -16,7 +16,7 @@ namespace atidlas
class model
{
typedef std::vector< tools::shared_ptr<base> > templates_container;
typedef std::vector< std::shared_ptr<base> > templates_container;
public:
struct runtime_options
{
@@ -34,8 +34,8 @@ namespace atidlas
std::vector<cl_ext::lazy_compiler>& init(expressions_tuple const & expressions, runtime_options const & opt = runtime_options());
public:
model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
model(std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
model(predictors::random_forest const &, std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
model(std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
model(base const &, cl::CommandQueue &);
void execute(expressions_tuple const &, operation_cache * cache = NULL, runtime_options const & opt = runtime_options());
@@ -44,13 +44,13 @@ namespace atidlas
templates_container const & templates() const;
private:
templates_container templates_;
tools::shared_ptr<predictors::random_forest> predictor_;
std::shared_ptr<predictors::random_forest> predictor_;
std::map<std::vector<int_t>, int> hardcoded_;
std::map<cl_context, std::map<std::string, std::vector<cl_ext::lazy_compiler> > > lazy_programs_;
cl::CommandQueue & queue_;
};
typedef std::map<std::pair<expression_type, numeric_type>, tools::shared_ptr<model> > model_map_t;
typedef std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<model> > model_map_t;
model_map_t init_models(cl::CommandQueue const & queue);
model_map_t& get_model_map(cl::CommandQueue & queue);

View File

@@ -6,7 +6,7 @@
#include <CL/cl.hpp>
#include "atidlas/types.h"
#include "atidlas/value_scalar.h"
#include "atidlas/tools/shared_ptr.hpp"
#include <memory>
namespace atidlas
{
@@ -168,7 +168,7 @@ void fill(lhs_rhs_element & x, array const & a);
void fill(lhs_rhs_element & x, value_scalar const & v);
void fill(lhs_rhs_element & x, repeat_infos const & r);
class array_expression
class array_expression : public array_base
{
public:
struct node
@@ -208,21 +208,21 @@ private:
size4 shape_;
};
class control
template<class TYPE>
class controller
{
public:
control(array_expression const & x, cl::Event* event = NULL, std::vector<cl::Event>* dependencies = NULL,
controller(TYPE const & x, cl::Event* event = NULL, std::vector<cl::Event>* dependencies = NULL,
cl::CommandQueue* queue = NULL, operation_cache* cache = NULL) : x_(x), event_(event), dependencies_(dependencies), queue_(queue), cache_(cache){}
array_expression const & expression() const { return x_; }
TYPE const & x() const { return x_; }
cl::Event* event() const { return event_; }
std::vector<cl::Event>* dependencies() const { return dependencies_; }
cl::CommandQueue* queue() const { return queue_; }
operation_cache* cache() const { return cache_; }
private:
array_expression const & x_;
TYPE const & x_;
cl::Event* event_;
std::vector<cl::Event>* dependencies_;
cl::CommandQueue* queue_;
@@ -232,9 +232,9 @@ private:
class expressions_tuple
{
private:
tools::shared_ptr<array_expression> create(array_expression const & s);
std::shared_ptr<array_expression> create(array_expression const & s);
public:
typedef std::list<tools::shared_ptr<array_expression> > data_type;
typedef std::list<std::shared_ptr<array_expression> > data_type;
enum order_type { SEQUENTIAL, INDEPENDENT };
expressions_tuple(array_expression const & s0);

View File

@@ -1,162 +0,0 @@
#ifndef ATIDLAS_TOOLS_SHARED_PTR_HPP
#define ATIDLAS_TOOLS_SHARED_PTR_HPP
/* =========================================================================
Copyright (c) 2010-2012, Institute for Microelectronics,
Institute for Analysis and Scientific Computing,
TU Wien.
Portions of this software are copyright by UChicago Argonne, LLC.
-----------------
ViennaCL - The Vienna Computing Library
-----------------
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
(A list of authors and contributors can be found in the PDF manual)
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
/** @file tools/shared_ptr.hpp
@brief Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
Contributed by Philippe Tillet.
*/
#include <cstdlib>
#include <algorithm>
namespace atidlas
{
namespace tools
{
namespace detail
{
/** @brief Reference counting class for the shared_ptr implementation */
class count
{
public:
count(unsigned int val) : val_(val){ }
void dec(){ --val_; }
void inc(){ ++val_; }
bool is_null(){ return val_ == 0; }
unsigned int val(){ return val_; }
private:
unsigned int val_;
};
/** @brief Interface for the reference counter inside the shared_ptr */
struct aux
{
detail::count count;
aux() :count(1) {}
virtual void destroy()=0;
virtual ~aux() {}
};
/** @brief Implementation helper for the reference counting mechanism inside shared_ptr. */
template<class U, class Deleter>
struct auximpl: public detail::aux
{
U* p;
Deleter d;
auximpl(U* pu, Deleter x) :p(pu), d(x) {}
virtual void destroy() { d(p); }
};
/** @brief Default deleter class for a pointer. The default is to just call 'delete' on the pointer. Provide your own implementations for 'delete[]' and 'free'. */
template<class U>
struct default_deleter
{
void operator()(U* p) const { delete p; }
};
}
class shared_ptr_base
{
protected:
detail::aux* pa;
public:
unsigned int count() { return pa->count.val(); }
};
/** @brief A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependency. Will be replaced by std::shared_ptr as soon as C++11 is widely available. */
template<class T>
class shared_ptr : public shared_ptr_base
{
template<class U>
friend class shared_ptr;
detail::aux* pa;
T* pt;
public:
shared_ptr() :pa(NULL), pt(NULL) {}
template<class U, class Deleter>
shared_ptr(U* pu, Deleter d) : pa(new detail::auximpl<U, Deleter>(pu, d)), pt(pu) {}
template<class U>
explicit shared_ptr(U* pu) : pa(new detail::auximpl<U, detail::default_deleter<U> >(pu, detail::default_deleter<U>())), pt(pu) {}
template<class U>
shared_ptr(const shared_ptr<U>& s) :pa(s.pa), pt(s.pt) { inc(); }
shared_ptr(const shared_ptr& s) :pa(s.pa), pt(s.pt) { inc(); }
~shared_ptr() { dec(); }
T* get() const { return pt; }
T* operator->() const { return pt; }
T& operator*() const { return *pt; }
void reset() { shared_ptr<T>().swap(*this); }
void reset(T * ptr) { shared_ptr<T>(ptr).swap(*this); }
void swap(shared_ptr<T> & other)
{
std::swap(pt,other.pt);
std::swap(pa, other.pa);
}
shared_ptr& operator=(const shared_ptr& s)
{
if (this!=&s)
{
dec();
pa = s.pa;
pt = s.pt;
inc();
}
return *this;
}
void inc()
{
if (pa) pa->count.inc();
}
void dec()
{
if (pa)
{
pa->count.dec();
if (pa->count.is_null())
{
pa->destroy();
delete pa;
pa = NULL;
}
}
}
};
}
}
#endif

View File

@@ -83,8 +83,8 @@ public:
void enqueue()
{
for(std::list<infos>::iterator it = l_.begin() ; it != l_.end() ; ++it)
it->queue.enqueueNDRangeKernel(it->kernel, it->offset, it->grange, it->lrange);
for(infos & elem : l_)
elem.queue.enqueueNDRangeKernel(elem.kernel, elem.offset, elem.grange, elem.lrange);
}
private:
@@ -173,7 +173,8 @@ struct slice
};
typedef slice _;
class obj_base{};
class array_base{ };
}
#endif

View File

@@ -11,40 +11,27 @@
namespace atidlas
{
array_infos array::init_infos(numeric_type dtype, cl_mem data, int_t shape1, int_t shape2, int_t start1, int_t start2, int_t stride1, int_t stride2, int_t ld)
{
array_infos res;
res.dtype = dtype;
res.data = data;
res.shape1 = shape1;
res.shape2 = shape2;
res.start1 = start1;
res.start2 = start2;
res.stride1 = stride1;
res.stride2 = stride2;
res.ld = ld;
return res;
}
/*--- Constructors ---*/
//1D Constructors
array::array(int_t size1, numeric_type dtype, cl::Context context) :
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*size1),
infos_(init_infos(dtype, data_(), size1, 1, 0, 0, 1, 1, size1))
{}
dtype_(dtype), shape_(size1, 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*dsize())
{ }
template<class DT>
array::array(std::vector<DT> const & x, cl::Context context):
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(to_numeric_type<DT>::value)*x.size()),
infos_(init_infos(to_numeric_type<DT>::value, data_(), x.size(), 1, 0, 0, 1, 1, x.size()))
dtype_(to_numeric_type<DT>::value), shape_(x.size(), 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
{ *this = x; }
array::array(array & v, slice const & s1) : context_(v.data_.getInfo<CL_MEM_CONTEXT>()), data_(v.data_),
infos_(init_infos(v.infos_.dtype, data_(), s1.size, 1, v.infos_.start1 + v.infos_.stride1*s1.start, 0, v.infos_.stride1*s1.stride, 1, v.infos_.ld))
array::array(array & v, slice const & s1) : dtype_(v.dtype_), shape_(s1.size, 1), start_(v.start_._1 + v.stride_._1*s1.start, 0), stride_(v.stride_._1*s1.stride, 1),
ld_(v.ld_), context_(v.data_.getInfo<CL_MEM_CONTEXT>()), data_(v.data_)
{}
#define INSTANTIATE(T) template array::array<T>(std::vector<T> const &, cl::Context)
#define INSTANTIATE(T) template array::array(std::vector<T> const &, cl::Context)
INSTANTIATE(cl_char);
INSTANTIATE(cl_uchar);
INSTANTIATE(cl_short);
@@ -58,26 +45,26 @@ INSTANTIATE(cl_double);
#undef INSTANTIATE
// 2D
array::array(int_t size1, int_t size2, numeric_type dtype, cl::Context context) :
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*size1*size2),
infos_(init_infos(dtype, data_(), size1, size2, 0, 0, 1, 1, size1))
array::array(int_t size1, int_t size2, numeric_type dtype, cl::Context context) : dtype_(dtype), shape_(size1, size2), start_(0, 0), stride_(1, 1), ld_(size1),
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
{}
array::array(array & M, slice const & s1, slice const & s2) :
context_(M.data_.getInfo<CL_MEM_CONTEXT>()), data_(M.data_),
infos_(init_infos(M.dtype(), data_(), s1.size, s2.size, M.start()._1 + M.stride()._1*s1.start, M.start()._2 + M.stride()._2*s2.start,
M.stride()._1*s1.stride, M.stride()._2*s2.stride, M.ld()))
array::array(array & M, slice const & s1, slice const & s2) : dtype_(M.dtype_), shape_(s1.size, s2.size),
start_(M.start_._1 + M.stride_._1*s1.start, M.start_._2 + M.stride_._2*s2.start),
stride_(M.stride_._1*s1.stride, M.stride_._2*s2.stride), ld_(M.ld_),
context_(M.data_.getInfo<CL_MEM_CONTEXT>()), data_(M.data_)
{ }
template<typename DT>
array::array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context):
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(to_numeric_type<DT>::value)*size1*size2),
infos_(init_infos(to_numeric_type<DT>::value, data_(), size1, size2, 0, 0, 1, 1, size1))
array::array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context)
: dtype_(to_numeric_type<DT>::value),
shape_(size1, size2), start_(0, 0), stride_(1, 1), ld_(size1),
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
{
atidlas::copy(data, *this);
}
#define INSTANTIATE(T) template array::array<T>(int_t, int_t, std::vector<T> const &, cl::Context)
#define INSTANTIATE(T) template array::array(int_t, int_t, std::vector<T> const &, cl::Context)
INSTANTIATE(cl_char);
INSTANTIATE(cl_uchar);
INSTANTIATE(cl_short);
@@ -92,43 +79,45 @@ INSTANTIATE(cl_double);
// General
array::array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context):
context_(context), data_(data),
infos_(init_infos(dtype, data_(), s1.size, s2.size, s1.start, s2.start, s1.stride, s2.stride, ld))
dtype_(dtype), shape_(s1.size, s2.size), start_(s1.start, s2.start), stride_(s1.stride, s2.stride),
ld_(ld), context_(context), data_(data)
{ }
array::array(control const & x):
context_(x.expression().context()), data_(context_, CL_MEM_READ_WRITE, size_of(x.expression().dtype())*prod(x.expression().shape())),
infos_(init_infos(x.expression().dtype(), data_(), x.expression().shape()._1, x.expression().shape()._2, 0, 0, 1, 1, x.expression().shape()._1))
array::array(array_expression const & proxy) :
dtype_(proxy.dtype()),
shape_(proxy.shape()), start_(0,0), stride_(1, 1), ld_(shape_._1),
context_(proxy.context()), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
{
*this = x;
*this = proxy;
}
array::array(array const & x) :
context_(x.context()), data_(context_, CL_MEM_READ_WRITE, size_of(x.dtype())*x.shape()._1*x.shape()._2),
infos_(init_infos(x.dtype(), data_(), x.shape()._1, x.shape()._2, 0, 0, 1, 1, x.shape()._1))
array::array(array const & other) :
dtype_(other.dtype()),
shape_(other.shape()), start_(0,0), stride_(1, 1), ld_(shape_._1),
context_(other.context()), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
{
*this = x;
*this = other;
}
/*--- Getters ---*/
numeric_type array::dtype() const
{ return infos_.dtype; }
{ return dtype_; }
size4 array::shape() const
{ return size4(infos_.shape1, infos_.shape2); }
{ return shape_; }
int_t array::nshape() const
{ return int_t((infos_.shape1 > 1) + (infos_.shape2 > 1)); }
{ return int_t((shape_._1 > 1) + (shape_._2 > 1)); }
size4 array::start() const
{ return size4(infos_.start1, infos_.start2); }
{ return start_; }
size4 array::stride() const
{ return size4(infos_.stride1, infos_.stride2); }
{ return stride_; }
int_t array::ld() const
{ return infos_.ld; }
{ return ld_; }
cl::Context const & array::context() const
{ return context_; }
@@ -137,27 +126,25 @@ cl::Buffer const & array::data() const
{ return data_; }
int_t array::dsize() const
{ return infos_.ld*infos_.shape2; }
{ return ld_*shape_._2; }
/*--- Assignment Operators ----*/
//---------------------------------------
array & array::operator=(array const & rhs)
{
assert(dtype() == rhs.dtype());
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype(), shape());
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
assert(dtype_ == rhs.dtype());
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
cl::CommandQueue & queue = cl_ext::queues[context_][0];
model_map_t & mmap = atidlas::get_model_map(queue);
execute(expression, mmap);
return *this;
}
array & array::operator=(control const & x)
array & array::operator=(array_expression const & rhs)
{
array_expression const & rhs = x.expression();
assert(dtype() == rhs.dtype());
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), dtype(), shape());
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
assert(dtype_ == rhs.dtype());
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), dtype_, shape_);
cl::CommandQueue & queue = cl_ext::queues[context_][0];
model_map_t & mmap = atidlas::get_model_map(queue);
execute(expression, mmap);
return *this;
@@ -186,47 +173,47 @@ INSTANTIATE(cl_double);
#undef INSTANTIATE
array_expression array::operator-()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype(), shape()); }
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
array_expression array::operator!()
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape()); }
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
//
array & array::operator+=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
array & array::operator+=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
array & array::operator+=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype_, shape_); }
//----
array & array::operator-=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
array & array::operator-=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
array & array::operator-=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
//----
array & array::operator*=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
array & array::operator*=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
array & array::operator*=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype_, shape_); }
//----
array & array::operator/=(value_scalar const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
array & array::operator/=(array const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
array & array::operator/=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype(), shape()); }
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype_, shape_); }
array_expression array::T() const
{ return atidlas::trans(*this) ;}
@@ -236,13 +223,13 @@ array_expression array::T() const
scalar array::operator [](int_t idx)
{
assert(nshape()==1);
return scalar(dtype(), data_, idx, context_);
return scalar(dtype_, data_, idx, context_);
}
const scalar array::operator [](int_t idx) const
{
assert(nshape()==1);
return scalar(dtype(), data_, idx, context_);
return scalar(dtype_, data_, idx, context_);
}
@@ -263,7 +250,7 @@ namespace detail
template<class T>
void copy(cl::Context & ctx, cl::Buffer const & data, T value)
{
cl_ext::get_queue(ctx, 0).enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
cl_ext::queues[ctx][0].enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
}
}
@@ -273,7 +260,7 @@ scalar::scalar(numeric_type dtype, const cl::Buffer &data, int_t offset, cl::Con
scalar::scalar(value_scalar value, cl::Context context) : array(1, value.dtype(), context)
{
switch(dtype())
switch(dtype_)
{
// case BOOL_TYPE: detail::copy(context_, data_, (cl_bool)value); break;
case CHAR_TYPE: detail::copy(context_, data_, (cl_char)value); break;
@@ -287,7 +274,7 @@ scalar::scalar(value_scalar value, cl::Context context) : array(1, value.dtype()
// case HALF_TYPE: detail::copy(context_, data_, (cl_float)value); break;
case FLOAT_TYPE: detail::copy(context_, data_, (cl_float)value); break;
case DOUBLE_TYPE: detail::copy(context_, data_, (cl_double)value); break;
default: throw unknown_datatype(dtype());
default: throw unknown_datatype(dtype_);
}
}
@@ -295,19 +282,19 @@ scalar::scalar(value_scalar value, cl::Context context) : array(1, value.dtype()
scalar::scalar(numeric_type dtype, cl::Context context) : array(1, dtype, context)
{ }
scalar::scalar(control const &proxy) : array(proxy){ }
scalar::scalar(array_expression const & proxy) : array(proxy){ }
template<class T>
T scalar::cast() const
{
values_holder v;
int_t dtsize = size_of(dtype());
int_t dtsize = size_of(dtype_);
#define HANDLE_CASE(DTYPE, VAL) \
case DTYPE:\
cl_ext::get_queue(context_, 0).enqueueReadBuffer(data_, CL_TRUE, infos_.start1*dtsize, dtsize, (void*)&v.VAL);\
cl_ext::queues[context_][0].enqueueReadBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v.VAL);\
return v.VAL
switch(dtype())
switch(dtype_)
{
// HANDLE_CASE(BOOL_TYPE, bool8);
HANDLE_CASE(CHAR_TYPE, int8);
@@ -321,7 +308,7 @@ case DTYPE:\
// HANDLE_CASE(HALF_TYPE, float16);
HANDLE_CASE(FLOAT_TYPE, float32);
HANDLE_CASE(DOUBLE_TYPE, float64);
default: throw unknown_datatype(dtype());
default: throw unknown_datatype(dtype_);
}
#undef HANDLE_CASE
@@ -329,16 +316,16 @@ case DTYPE:\
scalar& scalar::operator=(value_scalar const & s)
{
cl::CommandQueue& queue = cl_ext::get_queue(context_, 0);
int_t dtsize = size_of(dtype());
cl::CommandQueue& queue = cl_ext::queues[context_][0];
int_t dtsize = size_of(dtype_);
#define HANDLE_CASE(TYPE, CLTYPE) case TYPE:\
{\
CLTYPE v = s;\
queue.enqueueWriteBuffer(data_, CL_TRUE, infos_.start1*dtsize, dtsize, (void*)&v);\
queue.enqueueWriteBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v);\
return *this;\
}
switch(dtype())
switch(dtype_)
{
// HANDLE_CASE(BOOL_TYPE, cl_bool)
HANDLE_CASE(CHAR_TYPE, cl_char)
@@ -352,7 +339,7 @@ scalar& scalar::operator=(value_scalar const & s)
// HANDLE_CASE(HALF_TYPE, cl_half)
HANDLE_CASE(FLOAT_TYPE, cl_float)
HANDLE_CASE(DOUBLE_TYPE, cl_double)
default: throw unknown_datatype(dtype());
default: throw unknown_datatype(dtype_);
}
}
@@ -707,18 +694,11 @@ namespace detail
array reshape(array const & a, int_t size1, int_t size2)
{
array tmp(a);
tmp.infos_.shape1 = size1;
tmp.infos_.shape2 = size2;
tmp.shape_._1 = size1;
tmp.shape_._2 = size2;
return tmp;
}
array reshape(array_expression const & a, int_t size1, int_t size2)
{
array tmp(a);
tmp.infos_.shape1 = size1;
tmp.infos_.shape2 = size2;
return tmp;
}
#define DEFINE_DOT(LTYPE, RTYPE) \
array_expression dot(LTYPE const & x, RTYPE const & y)\
@@ -804,10 +784,10 @@ void copy(array const & x, void* data, cl::CommandQueue & queue, bool blocking)
}
void copy(void const *data, array &x, bool blocking)
{ copy(data, x, cl_ext::get_queue(x.context(), 0), blocking); }
{ copy(data, x, cl_ext::queues[x.context()][0], blocking); }
void copy(array const & x, void* data, bool blocking)
{ copy(x, data, cl_ext::get_queue(x.context(), 0), blocking); }
{ copy(x, data, cl_ext::queues[x.context()][0], blocking); }
//std::vector<>
template<class T>
@@ -832,11 +812,11 @@ void copy(array const & x, std::vector<T> & cx, cl::CommandQueue & queue, bool b
template<class T>
void copy(std::vector<T> const & cx, array & x, bool blocking)
{ copy(cx, x, cl_ext::get_queue(x.context(), 0), blocking); }
{ copy(cx, x, cl_ext::queues[x.context()][0], blocking); }
template<class T>
void copy(array const & x, std::vector<T> & cx, bool blocking)
{ copy(x, cx, cl_ext::get_queue(x.context(), 0), blocking); }
{ copy(x, cx, cl_ext::queues[x.context()][0], blocking); }
#define INSTANTIATE(T) \
template void copy<T>(std::vector<T> const &, array &, cl::CommandQueue&, bool);\

View File

@@ -74,8 +74,8 @@ std::string mapped_object::process(std::string const & in) const
{
std::string res(in);
preprocess(res);
for (std::map<std::string,std::string>::const_iterator it = keywords_.begin(); it != keywords_.end(); ++it)
tools::find_and_replace(res, it->first, it->second);
for (const auto & elem : keywords_)
tools::find_and_replace(res, elem.first, elem.second);
postprocess(res);
return res;
}

View File

@@ -30,44 +30,44 @@ numeric_type base::map_functor::get_numeric_type(atidlas::array_expression const
/** @brief Binary leaf */
template<class T>
tools::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
std::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
{
return tools::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
return std::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
}
/** @brief Scalar mapping */
tools::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
std::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
{
std::string strdtype = numeric_type_to_string(dtype);
return tools::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
return std::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
}
/** @brief Vector mapping */
tools::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
std::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
{
std::string dtype = numeric_type_to_string(a.dtype);
unsigned int id = binder_.get(a.data);
//Scalar
if(a.shape1==1 && a.shape2==1)
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
//Column vector
else if(a.shape1>1 && a.shape2==1)
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
//Row vector
else if(a.shape1==1 && a.shape2>1)
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
//Matrix
else
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
}
tools::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
std::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
{
//TODO: Make it less specific!
return tools::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
return std::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
}
tools::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
std::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
{
switch(lhs_rhs.type_family)
{
@@ -111,7 +111,7 @@ void base::map_functor::operator()(atidlas::array_expression const & array_expre
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_outer>(&array_expression, root_idx, &mapping_)));
else if (detail::is_cast(root_node.op))
mapping_.insert(mapping_type::value_type(key, tools::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
mapping_.insert(mapping_type::value_type(key, std::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
}
}
@@ -219,8 +219,8 @@ void base::compute_index_reduction(kernel_generation_stream & os, std::string ac
void base::process_all(std::string const & type_key, std::string const & str,
kernel_generation_stream & stream, std::vector<mapping_type> const & mappings)
{
for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit)
for (mapping_type::const_iterator mmit = mit->begin(); mmit != mit->end(); ++mmit)
for (const auto & mapping : mappings)
for (mapping_type::const_iterator mmit = mapping.begin(); mmit != mapping.end(); ++mmit)
if (mmit->second->type_key()==type_key)
stream << mmit->second->process(str) << std::endl;
}
@@ -230,9 +230,9 @@ void base::base::process_all_at(std::string const & type_key, std::string const
kernel_generation_stream & stream, std::vector<mapping_type> const & mappings,
size_t root_idx, leaf_t leaf)
{
for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit)
for (const auto & mapping : mappings)
{
mapped_object * obj = mit->at(mapping_key(root_idx, leaf)).get();
mapped_object * obj = mapping.at(mapping_key(root_idx, leaf)).get();
if (obj->type_key()==type_key)
stream << obj->process(str) << std::endl;
}
@@ -280,9 +280,9 @@ std::string base::generate_arguments(std::string const & data_type, std::vector<
void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg)
{
tools::shared_ptr<symbolic_binder> binder = make_binder();
for (expressions_tuple::data_type::const_iterator itt = expressions.data().begin(); itt != expressions.data().end(); ++itt)
traverse(**itt, (*itt)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
std::shared_ptr<symbolic_binder> binder = make_binder();
for (const auto & elem : expressions.data())
traverse(*elem, (elem)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
}
void base::fill_kernel_name(char * ptr, unsigned int label, const char * suffix)
@@ -372,8 +372,8 @@ bool base::is_strided(array_expression::node const & node)
bool base::requires_fallback(expressions_tuple const & expressions)
{
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
for(array_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt)
for (const auto & elem : expressions.data())
for(array_expression::container_type::const_iterator itt = (elem)->tree().begin(); itt != (elem)->tree().end() ; ++itt)
if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0))
|| (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0)))
return true;
@@ -478,12 +478,12 @@ unsigned int base::align(unsigned int to_round, unsigned int base)
return (to_round + base - 1)/base * base;
}
tools::shared_ptr<symbolic_binder> base::make_binder()
std::shared_ptr<symbolic_binder> base::make_binder()
{
if (binding_policy_==BIND_TO_HANDLE)
return tools::shared_ptr<symbolic_binder>(new bind_to_handle());
return std::shared_ptr<symbolic_binder>(new bind_to_handle());
else
return tools::shared_ptr<symbolic_binder>(new bind_all_unique());
return std::shared_ptr<symbolic_binder>(new bind_all_unique());
}
@@ -509,7 +509,7 @@ std::vector<std::string> base::generate(unsigned int label, expressions_tuple co
//Create mapping
std::vector<mapping_type> mappings(expressions.data().size());
tools::shared_ptr<symbolic_binder> binder = make_binder();
std::shared_ptr<symbolic_binder> binder = make_binder();
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit)
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
@@ -533,8 +533,8 @@ int_t base_impl<TType, PType>::local_size_1() const
{ return p_.local_size_1; }
template<class TType, class PType>
tools::shared_ptr<base> base_impl<TType, PType>::clone() const
{ return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
std::shared_ptr<base> base_impl<TType, PType>::clone() const
{ return std::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
template<class TType, class PType>
int base_impl<TType, PType>::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const

View File

@@ -1,6 +1,6 @@
#include "atidlas/array.h"
#include "atidlas/backend/templates/mproduct.h"
#include "atidlas/cl/lazy_compiler.h"
#include "atidlas/cl_ext/lazy_compiler.h"
#include "atidlas/tools/make_vector.hpp"
#include "atidlas/tools/to_string.hpp"
@@ -587,7 +587,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
kernel.setArg(current_arg++, cl_uint(N));
kernel.setArg(current_arg++, cl_uint(K));
tools::shared_ptr<symbolic_binder> binder = make_binder();
std::shared_ptr<symbolic_binder> binder = make_binder();
set_arguments_functor fun(*binder, current_arg, kernel);
fun.set_arguments(C);
fun.set_arguments(alpha.dtype(), alpha.values());

View File

@@ -50,8 +50,8 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
("array2", "#pointer += #start1 + #start2*#ld; "
"#ld *= #nldstride; "), expressions, mappings);
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
stream << (*it)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl;
for (const auto & expr : exprs)
stream << (expr)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl;
stream << "unsigned int lid0 = get_local_id(0);" << std::endl;
stream << "unsigned int lid1 = get_local_id(1);" << std::endl;
@@ -59,8 +59,8 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
stream << "for(unsigned int r = get_global_id(0); r < upper_bound_0; r += get_global_size(0)){" << std::endl;
stream.inc_tab();
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
stream << (*it)->process("#scalartype #name_acc = " + neutral_element((*it)->root_op()) + ";") << std::endl;
for (const auto & expr : exprs)
stream << (expr)->process("#scalartype #name_acc = " + neutral_element((expr)->root_op()) + ";") << std::endl;
stream << "if (r < M)" << std::endl;
stream << "{" << std::endl;
@@ -75,7 +75,7 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
{
std::string data_type = append_width("#scalartype",simd_width);
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
for (const auto & elem : exprs)
{
std::map<std::string, std::string> accessors;
if(reduction==REDUCE_COLUMNS)
@@ -88,7 +88,7 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
accessors["array2"] = "#scalartype #namereg = #pointer[r*#stride1 + c*#ld];";
accessors["repeat"] = "#scalartype #namereg = $VALUE{(r%#tuplearg0)*#stride, (c%#tuplearg1)*#stride};";
}
(*it)->process_recursive(stream, PARENT_NODE_TYPE, accessors);
(elem)->process_recursive(stream, PARENT_NODE_TYPE, accessors);
}
@@ -101,7 +101,7 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
str[a] = append_simd_suffix("#namereg.s",a);
for (unsigned int k = 0; k < exprs.size(); ++k)
for (auto & elem : exprs)
{
for (unsigned int a = 0; a < simd_width; ++a)
{
@@ -109,11 +109,11 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
accessors["array2"] = str[a];
accessors["repeat"] = "#namereg";
accessors["array0"] = "#namereg";
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (exprs[k]->is_index_reduction())
compute_index_reduction(stream, exprs[k]->process("#name_acc"), "c*"+to_string(simd_width) + to_string(a), exprs[k]->process("#name_acc_value"), value,exprs[k]->root_op());
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (elem->is_index_reduction())
compute_index_reduction(stream, elem->process("#name_acc"), "c*"+to_string(simd_width) + to_string(a), elem->process("#name_acc_value"), value,elem->root_op());
else
compute_reduction(stream, exprs[k]->process("#name_acc"), value,exprs[k]->root_op());
compute_reduction(stream, elem->process("#name_acc"), value,elem->root_op());
}
}
}
@@ -126,8 +126,8 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
stream.dec_tab();
stream << "}" << std::endl;
for (unsigned int k = 0; k < exprs.size(); ++k)
stream << exprs[k]->process("#name_buf[lid0*" + lsize1str + "+ lid1] = #name_acc;") << std::endl;
for (auto & expr : exprs)
stream << expr->process("#name_buf[lid0*" + lsize1str + "+ lid1] = #name_acc;") << std::endl;
stream << "#pragma unroll" << std::endl;
stream << "for(unsigned int stride = " << p_.local_size_1/2 << "; stride >0; stride /=2)" << std::endl;
@@ -139,13 +139,13 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
stream << "{" << std::endl;
stream.inc_tab();
for (unsigned int k = 0; k < exprs.size(); k++)
if (exprs[k]->is_index_reduction())
compute_index_reduction(stream, exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1]"), exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]")
, exprs[k]->process("#name_buf_value[lid0*" + lsize1str + " + lid1]"), exprs[k]->process("#name_buf_value[lid0*" + lsize1str + " + lid1 + stride]"),
exprs[k]->root_op());
for (auto & expr : exprs)
if (expr->is_index_reduction())
compute_index_reduction(stream, expr->process("#name_buf[lid0*" + lsize1str + " + lid1]"), expr->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]")
, expr->process("#name_buf_value[lid0*" + lsize1str + " + lid1]"), expr->process("#name_buf_value[lid0*" + lsize1str + " + lid1 + stride]"),
expr->root_op());
else
compute_reduction(stream,exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1]"), exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]"), exprs[k]->root_op());
compute_reduction(stream,expr->process("#name_buf[lid0*" + lsize1str + " + lid1]"), expr->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]"), expr->root_op());
stream.dec_tab();
stream << "}" << std::endl;
@@ -183,8 +183,8 @@ std::vector<std::string> mreduction::generate_impl(unsigned int label, expressio
{
array_expression const & first_expression = *expressions.data().front();
std::vector<size_t> idx = filter_nodes(&is_reduction, first_expression, false);
for (unsigned int j = 0; j < idx.size(); ++j)
exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get()));
for (auto & elem : idx)
exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(elem, PARENT_NODE_TYPE)).get()));
}
std::vector<std::string> res;

View File

@@ -16,9 +16,9 @@ reduction_parameters::reduction_parameters(unsigned int _simd_width,
unsigned int reduction::lmem_usage(expressions_tuple const & expressions) const
{
unsigned int res = 0;
for(expressions_tuple::data_type::const_iterator it = expressions.data().begin() ; it != expressions.data().end() ; ++it)
for(const auto & elem : expressions.data())
{
numeric_type numeric_t= lhs_most((*it)->tree(), (*it)->root()).lhs.dtype;
numeric_type numeric_t= lhs_most((elem)->tree(), (elem)->root()).lhs.dtype;
res += p_.local_size_0*size_of(numeric_t);
}
return res;
@@ -43,13 +43,13 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream,
stream << "{" << std::endl;
stream.inc_tab();
for (unsigned int k = 0; k < exprs.size(); k++)
if (exprs[k]->is_index_reduction())
compute_index_reduction(stream, exprs[k]->process(buf_str+"[lid]"), exprs[k]->process(buf_str+"[lid+stride]")
, exprs[k]->process(buf_value_str+"[lid]"), exprs[k]->process(buf_value_str+"[lid+stride]"),
exprs[k]->root_op());
for (auto & expr : exprs)
if (expr->is_index_reduction())
compute_index_reduction(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]")
, expr->process(buf_value_str+"[lid]"), expr->process(buf_value_str+"[lid+stride]"),
expr->root_op());
else
compute_reduction(stream, exprs[k]->process(buf_str+"[lid]"), exprs[k]->process(buf_str+"[lid+stride]"), exprs[k]->root_op());
compute_reduction(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op());
stream.dec_tab();
stream << "}" << std::endl;
stream.dec_tab();
@@ -61,8 +61,8 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
kernel_generation_stream stream;
std::vector<mapped_scalar_reduction*> exprs;
for (std::vector<mapping_type>::const_iterator it = mappings.begin(); it != mappings.end(); ++it)
for (mapping_type::const_iterator iit = it->begin(); iit != it->end(); ++iit)
for (const auto & mapping : mappings)
for (mapping_type::const_iterator iit = mapping.begin(); iit != mapping.end(); ++iit)
if (mapped_scalar_reduction * p = dynamic_cast<mapped_scalar_reduction*>(iit->second.get()))
exprs.push_back(p);
std::size_t N = exprs.size();
@@ -122,8 +122,8 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
{
std::string i = (simd_width==1)?"i*#stride":"i";
//Fetch vector entry
for (std::vector<mapped_scalar_reduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
(*it)->process_recursive(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", append_width("#scalartype",simd_width) + " #namereg = " + vload(simd_width,i,"#pointer")+";")
for (const auto & elem : exprs)
(elem)->process_recursive(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", append_width("#scalartype",simd_width) + " #namereg = " + vload(simd_width,i,"#pointer")+";")
("matrix_row", "#scalartype #namereg = #pointer[$OFFSET{#row*#stride, i*#stride2}];")
("matrix_column", "#scalartype #namereg = #pointer[$OFFSET{i*#stride,#column*#stride2}];")
("matrix_diag", "#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride, i*#stride2}:$OFFSET{i*#stride, (i + #diag_offset)*#stride2}];"));
@@ -137,7 +137,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
for (unsigned int a = 0; a < simd_width; ++a)
str[a] = append_simd_suffix("#namereg.s", a);
for (unsigned int k = 0; k < exprs.size(); ++k)
for (auto & elem : exprs)
{
for (unsigned int a = 0; a < simd_width; ++a)
{
@@ -147,12 +147,12 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
accessors["matrix_column"] = str[a];
accessors["matrix_diag"] = str[a];
accessors["array0"] = "#namereg";
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (exprs[k]->is_index_reduction())
compute_index_reduction(stream, exprs[k]->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
+ tools::to_string(a), exprs[k]->process("#name_acc_value"), value,exprs[k]->root_op());
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (elem->is_index_reduction())
compute_index_reduction(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
+ tools::to_string(a), elem->process("#name_acc_value"), value,elem->root_op());
else
compute_reduction(stream, exprs[k]->process("#name_acc"), value,exprs[k]->root_op());
compute_reduction(stream, elem->process("#name_acc"), value,elem->root_op());
}
}
}
@@ -286,11 +286,11 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
//Preprocessing
int_t size = input_sizes(expressions)[0];
std::vector<array_expression::node const *> reductions;
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
for (const auto & elem : expressions.data())
{
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, **it, false);
for (std::vector<size_t>::iterator itt = reductions_idx.begin(); itt != reductions_idx.end(); ++itt)
reductions.push_back(&(*it)->tree()[*itt]);
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, *elem, false);
for (auto & reductions_idx_itt : reductions_idx)
reductions.push_back(&(elem)->tree()[reductions_idx_itt]);
}
//Kernel
@@ -314,10 +314,10 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
cl::Context context = expressions.context();
array_expression const & s = *(expressions.data().front());
unsigned int dtype_size = size_of(lhs_most(s.tree(), s.root()).lhs.dtype);
for (unsigned int k = 0; k < 2; k++)
for (auto & kernel : kernels)
{
unsigned int n_arg = 0;
kernels[k].setArg(n_arg++, cl_uint(size));
kernel.setArg(n_arg++, cl_uint(size));
//Temporary buffers
unsigned int i = 0;
@@ -328,15 +328,15 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
{
if (tmpidx_.size() <= j)
tmpidx_.push_back(cl::Buffer(context, CL_MEM_READ_WRITE, p_.num_groups*4));
kernels[k].setArg(n_arg++, tmpidx_[j]);
kernel.setArg(n_arg++, tmpidx_[j]);
j++;
}
if (tmp_.size() <= i)
tmp_.push_back(cl::Buffer(context, CL_MEM_READ_WRITE, p_.num_groups*dtype_size));
kernels[k].setArg(n_arg++, tmp_[i]);
kernel.setArg(n_arg++, tmp_[i]);
i++;
}
set_arguments(expressions, kernels[k], n_arg);
set_arguments(expressions, kernel, n_arg);
}
for (unsigned int k = 0; k < 2; k++)

View File

@@ -1,5 +1,5 @@
#include "atidlas/backend/templates/vaxpy.h"
#include "atidlas/cl/queues.h"
#include "atidlas/cl_ext/backend.h"
#include "atidlas/tools/make_map.hpp"
#include "atidlas/tools/make_vector.hpp"
#include "atidlas/tools/to_string.hpp"
@@ -121,8 +121,8 @@ void vaxpy::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler>
bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0));
cl::Program const & program = programs[fallback?0:1].program();
cl_ext::kernels_t::key_type key(program(), label);
cl_ext::kernels_t::iterator it = cl_ext::kernels.find(key);
cl_ext::kernels_type::key_type key(program(), label);
cl_ext::kernels_type::iterator it = cl_ext::kernels.find(key);
if(it==cl_ext::kernels.end())
it = cl_ext::kernels.insert(std::make_pair(key, cl::Kernel(program, fallback?kfb:kopt))).first;
cl::Kernel & kernel = it->second;

View File

@@ -1,63 +0,0 @@
#include "atidlas/cl/queues.h"
#include <assert.h>
#include <stdexcept>
namespace atidlas
{
namespace cl_ext
{
cl_command_queue_properties queue_properties = 0;
unsigned int default_context_idx = 0;
queues_t queues;
kernels_t kernels;
void synchronize(cl::Context const & context)
{
std::vector<cl::CommandQueue> & q = get_queues(context);
for(std::vector<cl::CommandQueue>::iterator it = q.begin() ; it != q.end() ; ++it)
it->finish();
}
void init_queues()
{
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
for(std::vector<cl::Platform>::iterator it = platforms.begin() ; it != platforms.end() ; ++it)
{
std::vector<cl::Device> devices;
it->getDevices(CL_DEVICE_TYPE_ALL, &devices);
for(std::vector<cl::Device>::iterator itt = devices.begin() ; itt != devices.end() ; ++itt)
queues.push_back(std::make_pair(cl::Context(std::vector<cl::Device>(1, *itt)), std::vector<cl::CommandQueue>()));
}
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
it->second.push_back(cl::CommandQueue(it->first, it->first.getInfo<CL_CONTEXT_DEVICES>()[0], queue_properties));
}
cl::Context default_context()
{
if(queues.empty())
init_queues();
return queues.begin()->first;
}
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx)
{
if(queues.empty())
init_queues();
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
if(it->first()==ctx()) return it->second;
throw std::out_of_range("No such context registered in the backend. Please run atidlas::cl_ext:;register(context, queues)");
}
cl::CommandQueue & get_queue(cl::Context const & ctx, std::size_t idx)
{
return get_queues(ctx)[idx];
}
}
}

78
lib/cl_ext/backend.cpp Normal file
View File

@@ -0,0 +1,78 @@
#include "atidlas/cl_ext/backend.h"
#include <assert.h>
#include <stdexcept>
namespace atidlas
{
namespace cl_ext
{
void synchronize(cl::Context const & context)
{
for(std::vector<cl::CommandQueue>::const_iterator it = queues[context].begin() ; it != queues[context].end() ; ++it)
it->finish();
}
void queues_type::append(const cl::Context & context)
{
data_.push_back(std::make_pair(context, std::vector<cl::CommandQueue>()));
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
for(auto & device : devices)
data_.back().second.push_back(cl::CommandQueue(context, device, queue_properties));
}
void queues_type::init()
{
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
for(auto & platform : platforms)
{
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
for(auto & device : devices)
data_.push_back(std::make_pair(cl::Context(std::vector<cl::Device>(1, device)), std::vector<cl::CommandQueue>()));
}
for(auto & elem : data_)
elem.second.push_back(cl::CommandQueue(elem.first, elem.first.getInfo<CL_CONTEXT_DEVICES>()[0], queue_properties));
}
std::vector<cl::CommandQueue> & queues_type::operator [](cl::Context const & ctx)
{
if(data_.empty())
init();
for(auto & elem : data_)
if(elem.first()==ctx()) return elem.second;
append(ctx);
return data_.back().second;
}
cl::Context queues_type::default_context()
{
if(data_.empty())
init();
data_type::iterator it = data_.begin();
std::advance(it, default_context_idx);
return it->first;
}
cl::Context default_context()
{
return queues.default_context();
}
queues_type::data_type const & queues_type::data()
{
if(data_.empty())
init();
return data_;
}
cl_command_queue_properties queue_properties = 0;
unsigned int default_context_idx = 0;
queues_type queues;
kernels_type kernels;
}
}

View File

@@ -1,4 +1,4 @@
#include "atidlas/cl/lazy_compiler.h"
#include "atidlas/cl_ext/lazy_compiler.h"
namespace atidlas
{

View File

@@ -3,7 +3,7 @@
#include <iostream>
#include <fstream>
#include "atidlas/cl/program_map.h"
#include "atidlas/cl_ext/program_map.h"
#include "sha1.hpp"
namespace atidlas

View File

@@ -213,8 +213,8 @@ inline std::string sha1(std::string const & src)
sha1.getDigest(hash);
std::ostringstream oss;
for (int i = 0; i < 5; ++i)
oss << std::hex << std::setfill('0') << std::setw(8) << hash[i];
for (auto & elem : hash)
oss << std::hex << std::setfill('0') << std::setw(8) << elem;
return oss.str();
}

View File

@@ -1,6 +1,7 @@
#include <set>
#include <fstream>
#include <stdexcept>
#include <algorithm>
#include "rapidjson/document.h"
#include "atidlas/backend/parse.h"
@@ -39,8 +40,8 @@ void model::fill_program_name(char* program_name, expressions_tuple const & expr
binder = new bind_to_handle();
else
binder = new bind_all_unique();
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
traverse(**it, (*it)->root(), array_expression_representation_functor(*binder, program_name),true);
for (const auto & elem : expressions.data())
traverse(*elem, elem->root(), array_expression_representation_functor(*binder, program_name),true);
*program_name='\0';
delete binder;
}
@@ -80,11 +81,11 @@ std::vector<cl_ext::lazy_compiler>& model::init(expressions_tuple const & expres
return to_init;
}
model::model(predictors::random_forest const & predictor, std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
model::model(predictors::random_forest const & predictor, std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
templates_(templates), predictor_(new predictors::random_forest(predictor)), queue_(queue)
{}
model::model(std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
model::model(std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
{}
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
@@ -166,27 +167,27 @@ namespace detail
throw std::invalid_argument("Invalid datatype: " + name);
}
static tools::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
static std::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
{
fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS};
if(template_name=="vaxpy")
return tools::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
return std::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
else if(template_name=="dot")
return tools::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
return std::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
else if(template_name=="maxpy")
return tools::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
return std::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
else if(template_name.find("gemvN")!=std::string::npos)
return tools::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
return std::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
else if(template_name.find("gemvT")!=std::string::npos)
return tools::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
return std::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
else if(template_name.find("gemmNN")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return std::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmTN")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return std::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmNT")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return std::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else if(template_name.find("gemmTT")!=std::string::npos)
return tools::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
return std::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
else
throw std::invalid_argument("Invalid expression: " + template_name);
}
@@ -208,32 +209,32 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
//Deserialize
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "dot" << "maxpy" << "gemvN" << "gemvT" << "gemmNN" << "gemmTN" << "gemmTT";
std::vector<std::string> dtype = tools::make_vector<std::string>() << "float32" << "float64";
for(std::vector<std::string>::iterator op = operations.begin() ; op != operations.end() ; ++op)
for(auto & operation : operations)
{
const char * opcstr = op->c_str();
const char * opcstr = operation.c_str();
if(document.HasMember(opcstr))
{
expression_type etype = detail::get_expression_type(*op);
for(std::vector<std::string>::iterator dt = dtype.begin() ; dt != dtype.end() ; ++dt)
expression_type etype = detail::get_expression_type(operation);
for(auto & elem : dtype)
{
const char * dtcstr = dt->c_str();
const char * dtcstr = elem.c_str();
if(document[opcstr].HasMember(dtcstr))
{
numeric_type dtype = detail::get_dtype(*dt);
numeric_type dtype = detail::get_dtype(elem);
// Get profiles
std::vector<tools::shared_ptr<base> > templates;
std::vector<std::shared_ptr<base> > templates;
js::Value const & profiles = document[opcstr][dtcstr]["profiles"];
for (js::SizeType id = 0 ; id < profiles.Size() ; ++id)
templates.push_back(detail::create(*op, tools::to_int_array<int>(profiles[id])));
templates.push_back(detail::create(operation, tools::to_int_array<int>(profiles[id])));
if(templates.size()>1)
{
// Get predictor
predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]);
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(predictor, templates, queue));
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(predictor, templates, queue));
}
else
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(templates, queue));
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(templates, queue));
}
}
}
@@ -243,11 +244,11 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
model_map_t init_models(cl::CommandQueue & queue)
{
model_map_t res;
typedef tools::shared_ptr<model> ptr_t;
typedef std::shared_ptr<model> ptr_t;
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
for(size_t i = 0 ; i < 10 ; ++i){
numeric_type DTYPE = types[i];
for(auto DTYPE : types){
res[std::make_pair(SCALAR_AXPY_TYPE, DTYPE)] = ptr_t(new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
res[std::make_pair(VECTOR_AXPY_TYPE, DTYPE)] = ptr_t (new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
res[std::make_pair(REDUCTION_TYPE, DTYPE)] = ptr_t(new model(reduction(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));

View File

@@ -38,9 +38,9 @@ random_forest::random_forest(rapidjson::Value const & estimators)
std::vector<float> random_forest::predict(std::vector<int_t> const & x) const
{
std::vector<float> res(D_, 0);
for(std::vector<tree>::const_iterator it = estimators_.begin() ; it != estimators_.end() ; ++it)
for(const auto & elem : estimators_)
{
std::vector<float> const & subres = it->predict(x);
std::vector<float> const & subres = elem.predict(x);
for(int_t i = 0 ; i < D_ ; ++i)
res[i] += subres[i];
}

View File

@@ -172,30 +172,30 @@ namespace atidlas
/*----Parse required temporaries-----*/
detail::parse(tree, rootidx, current_type, breakpoints, final_type);
std::vector<tools::shared_ptr<obj_base> > temporaries_;
std::vector<std::shared_ptr<array> > temporaries_;
/*----Compute required temporaries----*/
for(detail::breakpoints_t::reverse_iterator rit = breakpoints.rbegin() ; rit != breakpoints.rend() ; ++rit)
{
tools::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
std::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
array_expression::node const & node = tree[rit->second->node_index];
array_expression::node const & lmost = lhs_most(tree, node);
//Creates temporary
tools::shared_ptr<obj_base> tmp;
std::shared_ptr<array> tmp;
switch(rit->first){
case SCALAR_AXPY_TYPE:
case REDUCTION_TYPE: tmp = tools::shared_ptr<obj_base>(new array(1, dtype, context)); break;
case REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(1, dtype, context)); break;
case VECTOR_AXPY_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case ROW_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case COL_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape2, dtype, context)); break;
case VECTOR_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case ROW_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
case COL_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape2, dtype, context)); break;
case MATRIX_AXPY_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NN_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NT_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
case MATRIX_PRODUCT_TN_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_TT_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
case MATRIX_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_NT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
case MATRIX_PRODUCT_TN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
case MATRIX_PRODUCT_TT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
default: throw std::invalid_argument("Unrecognized operation");
}

View File

@@ -176,9 +176,9 @@ array_expression array_expression::operator!()
//
tools::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
std::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
{
return tools::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
return std::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
}
expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order)

View File

@@ -45,7 +45,7 @@ def main():
return optlist
cvars = sysconfig.get_config_vars()
cvars['OPT'] = "-DNDEBUG -O3 " + str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-O', '-Wstrict-prototypes', '-DNDEBUG']))
cvars['OPT'] = "-DNDEBUG -O3 -std=c++11 " + str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-O', '-Wstrict-prototypes', '-DNDEBUG']))
cvars["CFLAGS"] = cvars["BASECFLAGS"] + " " + cvars["OPT"]
cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"]

View File

@@ -1,4 +1,5 @@
#include <list>
#include <functional>
#include <boost/python.hpp>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
@@ -190,9 +191,6 @@ namespace detail
return to_list(devices.begin(), devices.end());
}
std::vector<cl::CommandQueue> & get_queue(cl::Context const & ctx)
{ return atd::cl_ext::get_queues(ctx); }
atd::numeric_type extract_dtype(bp::object const & odtype)
{
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
@@ -352,7 +350,7 @@ void export_cl()
.def("__init__", bp::make_constructor(&detail::make_context))
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_context_info<NAME>)
#undef WRAP
.add_property("queues", bp::make_function(&detail::get_queue, bp::return_internal_reference<>()))
.add_property("queues", bp::make_function(static_cast<std::vector<cl::CommandQueue> & (*)(const cl::Context&)>( [](const cl::Context & ctx) -> std::vector<cl::CommandQueue> & { return atd::cl_ext::queues[ctx]; }) , bp::return_internal_reference<>()))
;
bp::class_<cl::CommandQueue>("command_queue", bp::init<cl::Context, cl::Device>())

View File

@@ -3,5 +3,5 @@ foreach(PROG maxpy vaxpy reduction mreduction mproduct)
add_executable(${PROG}-test ${PROG}.cpp)
add_test(${PROG} ${PROG}-test)
target_link_libraries(${PROG}-test atidlas ${OPENCL_LIBRARIES})
set_target_properties(${PROG}-test PROPERTIES COMPILE_FLAGS "-Wall -Wextra")
set_target_properties(${PROG}-test PROPERTIES COMPILE_FLAGS "-Wall -Wextra -std=c++11")
endforeach(PROG)

View File

@@ -125,15 +125,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
int main()
{
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
for(const auto & elem : ad::cl_ext::queues.data())
{
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;
test_impl<float>(1e-4, it->first);
test_impl<float>(1e-4, elem.first);
std::cout << ">> double" << std::endl;
test_impl<double>(1e-9, it->first);
test_impl<double>(1e-9, elem.first);
std::cout << "---" << std::endl;
}
return EXIT_SUCCESS;

View File

@@ -2,7 +2,7 @@
#include "viennacl/vector.hpp"
#include "atidlas/tools/shared_ptr.hpp"
#include <memory>
#include "atidlas/model/import.hpp"
namespace ad = atidlas;
@@ -10,7 +10,7 @@ namespace ad = atidlas;
int main()
{
viennacl::vector<float> x(10000), y(10000), z(10000);
std::map<std::string, ad::tools::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
std::map<std::string, ad::std::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
return EXIT_SUCCESS;

View File

@@ -75,15 +75,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
int main()
{
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
for(const auto & elem : ad::cl_ext::queues.data())
{
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;
test_impl<float>(1e-4, it->first);
test_impl<float>(1e-4, elem.first);
std::cout << ">> double" << std::endl;
test_impl<double>(1e-9, it->first);
test_impl<double>(1e-9, elem.first);
std::cout << "---" << std::endl;
}
return EXIT_SUCCESS;

View File

@@ -65,15 +65,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
int main()
{
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
for(const auto & elem : ad::cl_ext::queues.data())
{
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;
test_impl<float>(1e-4, it->first);
test_impl<float>(1e-4, elem.first);
std::cout << ">> double" << std::endl;
test_impl<double>(1e-9, it->first);
test_impl<double>(1e-9, elem.first);
std::cout << "---" << std::endl;
}
return EXIT_SUCCESS;

View File

@@ -74,15 +74,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
int main()
{
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
for(const auto & elem : ad::cl_ext::queues.data())
{
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;
test_impl<float>(1e-4, it->first);
test_impl<float>(1e-4, elem.first);
std::cout << ">> double" << std::endl;
test_impl<double>(1e-9, it->first);
test_impl<double>(1e-9, elem.first);
std::cout << "---" << std::endl;
}
return EXIT_SUCCESS;

View File

@@ -120,15 +120,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
int main()
{
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
for(const auto & elem : ad::cl_ext::queues.data())
{
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;
test_impl<float>(1e-4, it->first);
test_impl<float>(1e-4, elem.first);
std::cout << ">> double" << std::endl;
test_impl<double>(1e-9, it->first);
test_impl<double>(1e-9, elem.first);
std::cout << "---" << std::endl;
}
return EXIT_SUCCESS;