Ported to C++11
This commit is contained in:
@@ -46,13 +46,13 @@ void bench(ad::numeric_type dtype)
|
||||
/*--BLAS1--*/
|
||||
/*---------*/
|
||||
std::cout << "#AXPY" << std::endl;
|
||||
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||
for(auto N : BLAS1_N)
|
||||
{
|
||||
int_t N = *it;
|
||||
|
||||
std::cout << N;
|
||||
/* ATIDLAS */
|
||||
ad::array x(N, dtype), y(N, dtype);
|
||||
cl::CommandQueue & queue = ad::cl_ext::get_queue(x.context(), 0);
|
||||
cl::CommandQueue & queue = ad::cl_ext::queues[x.context()][0];
|
||||
ad::model & model = ad::get_model(queue, ad::VECTOR_AXPY_TYPE, dtype);
|
||||
ad::array_expression E = ad::detail::assign(y, x + y);
|
||||
model.tune(E);
|
||||
@@ -178,15 +178,16 @@ int main(int argc, char* argv[])
|
||||
#endif
|
||||
|
||||
int device_idx = 0;
|
||||
if(ad::cl_ext::queues.size()>1){
|
||||
ad::cl_ext::queues_t & queues = ad::cl_ext::queues;
|
||||
ad::cl_ext::queues_type::data_type const & queues = ad::cl_ext::queues.data();
|
||||
|
||||
if(queues.size()>1){
|
||||
if(argc!=2)
|
||||
{
|
||||
std::cerr << "usage : blas-bench [DEVICE_IDX]" << std::endl;
|
||||
std::cout << "Devices available: " << std::endl;
|
||||
unsigned int current=0;
|
||||
for(ad::cl_ext::queues_t::const_iterator it = queues.begin() ; it != queues.end() ; ++it){
|
||||
cl::Device device = it->first.getInfo<CL_CONTEXT_DEVICES>()[0];
|
||||
for(const auto & queue : queues){
|
||||
cl::Device device = queue.first.getInfo<CL_CONTEXT_DEVICES>()[0];
|
||||
std::cout << current++ << ": " << device.getInfo<CL_DEVICE_NAME>() << "(" << cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_NAME>() << ")" << std::endl;
|
||||
}
|
||||
exit(EXIT_FAILURE);
|
||||
|
@@ -3,6 +3,7 @@
|
||||
|
||||
#include "vector"
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
int ceil(int N, int pad)
|
||||
{
|
||||
|
@@ -12,9 +12,9 @@ __global__ void dummy(){}
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
for(const auto & elem : ad::cl_ext::queues.data())
|
||||
{
|
||||
cl::CommandQueue queue = it->second[0];
|
||||
cl::CommandQueue queue = elem.second[0];
|
||||
cl::Device device = queue.getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Program program("__kernel void dummy(){}");
|
||||
program.build();
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include <iostream>
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/types.h"
|
||||
#include "atidlas/cl/queues.h"
|
||||
#include "atidlas/cl_ext/backend.h"
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
|
||||
@@ -12,11 +12,10 @@ namespace atidlas
|
||||
{
|
||||
|
||||
class scalar;
|
||||
class array: public obj_base
|
||||
|
||||
class array: public array_base
|
||||
{
|
||||
friend array reshape(array const &, int_t, int_t);
|
||||
friend array reshape(array_expression const &, int_t, int_t);
|
||||
static array_infos init_infos(numeric_type dtype, cl_mem data, int_t shape1, int_t shape2, int_t start1, int_t start2, int_t stride1, int_t stride2, int_t ld);
|
||||
public:
|
||||
//1D Constructors
|
||||
array(int_t size1, numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||
@@ -32,15 +31,15 @@ public:
|
||||
|
||||
//General constructor
|
||||
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl_ext::default_context());
|
||||
array(control const & proxy);
|
||||
array(array_expression const & proxy);
|
||||
array(array const &);
|
||||
|
||||
//Getters
|
||||
numeric_type dtype() const;
|
||||
size4 shape() const;
|
||||
int_t nshape() const;
|
||||
size4 start() const;
|
||||
size4 stride() const;
|
||||
int_t nshape() const;
|
||||
int_t ld() const;
|
||||
cl::Context const & context() const;
|
||||
cl::Buffer const & data() const;
|
||||
@@ -51,7 +50,8 @@ public:
|
||||
|
||||
//Numeric operators
|
||||
array& operator=(array const &);
|
||||
array& operator=(control const &);
|
||||
array& operator=(array_expression const &);
|
||||
|
||||
template<class T> array & operator=(std::vector<T> const & rhs);
|
||||
|
||||
array_expression operator-();
|
||||
@@ -78,9 +78,15 @@ public:
|
||||
|
||||
array_expression T() const;
|
||||
protected:
|
||||
numeric_type dtype_;
|
||||
|
||||
size4 shape_;
|
||||
size4 start_;
|
||||
size4 stride_;
|
||||
int_t ld_;
|
||||
|
||||
cl::Context context_;
|
||||
cl::Buffer data_;
|
||||
array_infos infos_;
|
||||
};
|
||||
|
||||
class scalar : public array
|
||||
@@ -91,7 +97,7 @@ public:
|
||||
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl_ext::default_context());
|
||||
explicit scalar(value_scalar value, cl::Context context = cl_ext::default_context());
|
||||
explicit scalar(numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||
scalar(control const & proxy);
|
||||
scalar(array_expression const & proxy);
|
||||
scalar& operator=(value_scalar const &);
|
||||
// scalar& operator=(scalar const & s);
|
||||
using array::operator =;
|
||||
|
@@ -20,7 +20,7 @@ enum leaf_t
|
||||
class mapped_object;
|
||||
|
||||
typedef std::pair<int_t, leaf_t> mapping_key;
|
||||
typedef std::map<mapping_key, tools::shared_ptr<mapped_object> > mapping_type;
|
||||
typedef std::map<mapping_key, std::shared_ptr<mapped_object> > mapping_type;
|
||||
|
||||
/** @brief Mapped Object
|
||||
*
|
||||
|
@@ -9,7 +9,7 @@
|
||||
#include "atidlas/backend/parse.h"
|
||||
#include "atidlas/backend/stream.h"
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/cl/lazy_compiler.h"
|
||||
#include "atidlas/cl_ext/lazy_compiler.h"
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
namespace atidlas
|
||||
@@ -75,15 +75,15 @@ protected:
|
||||
/** @brief Accessor for the numeric type */
|
||||
numeric_type get_numeric_type(atidlas::array_expression const * array_expression, int_t root_idx) const;
|
||||
/** @brief Creates a binary leaf */
|
||||
template<class T> tools::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
|
||||
template<class T> std::shared_ptr<mapped_object> binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const;
|
||||
/** @brief Creates a value scalar mapping */
|
||||
tools::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
|
||||
std::shared_ptr<mapped_object> create(numeric_type dtype, values_holder) const;
|
||||
/** @brief Creates a vector mapping */
|
||||
tools::shared_ptr<mapped_object> create(array_infos const &) const;
|
||||
std::shared_ptr<mapped_object> create(array_infos const &) const;
|
||||
/** @brief Creates a tuple mapping */
|
||||
tools::shared_ptr<mapped_object> create(repeat_infos const &) const;
|
||||
std::shared_ptr<mapped_object> create(repeat_infos const &) const;
|
||||
/** @brief Creates a mapping */
|
||||
tools::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
|
||||
std::shared_ptr<mapped_object> create(lhs_rhs_element const &) const;
|
||||
public:
|
||||
map_functor(symbolic_binder & binder, mapping_type & mapping);
|
||||
/** @brief Functor for traversing the tree */
|
||||
@@ -143,7 +143,7 @@ protected:
|
||||
static bool is_reduction(array_expression::node const & node);
|
||||
static bool is_index_reduction(op_element const & op);
|
||||
|
||||
tools::shared_ptr<symbolic_binder> make_binder();
|
||||
std::shared_ptr<symbolic_binder> make_binder();
|
||||
static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr);
|
||||
static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr);
|
||||
static std::string append_width(std::string const & str, unsigned int width);
|
||||
@@ -164,7 +164,7 @@ public:
|
||||
virtual int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const = 0;
|
||||
virtual void enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler> & programs,
|
||||
unsigned int label, expressions_tuple const & expressions, operation_cache* cache = NULL) = 0;
|
||||
virtual tools::shared_ptr<base> clone() const = 0;
|
||||
virtual std::shared_ptr<base> clone() const = 0;
|
||||
private:
|
||||
binding_policy_t binding_policy_;
|
||||
};
|
||||
@@ -180,7 +180,7 @@ public:
|
||||
base_impl(parameters_type const & parameters, binding_policy_t binding_policy);
|
||||
int_t local_size_0() const;
|
||||
int_t local_size_1() const;
|
||||
tools::shared_ptr<base> clone() const;
|
||||
std::shared_ptr<base> clone() const;
|
||||
/** @brief returns whether or not the profile has undefined behavior on particular device */
|
||||
int check_invalid(expressions_tuple const & expressions, cl::Device const & device) const;
|
||||
protected:
|
||||
|
@@ -1,31 +0,0 @@
|
||||
#ifndef ATIDLAS_CL_QUEUES_H
|
||||
#define ATIDLAS_CL_QUEUES_H
|
||||
|
||||
#include <map>
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/cl/compare.hpp"
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
typedef std::map<std::pair<cl_program, unsigned int>, cl::Kernel> kernels_t;
|
||||
typedef std::vector<std::pair<cl::Context, std::vector<cl::CommandQueue> > > queues_t;
|
||||
|
||||
extern kernels_t kernels;
|
||||
extern queues_t queues;
|
||||
extern unsigned int default_context_idx;
|
||||
extern cl_command_queue_properties queue_properties;
|
||||
|
||||
|
||||
void synchronize(cl::Context const & context);
|
||||
cl::Context default_context();
|
||||
cl::CommandQueue & get_queue(cl::Context const &, std::size_t);
|
||||
cl::Device get_device(cl::CommandQueue &);
|
||||
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
40
include/atidlas/cl_ext/backend.h
Normal file
40
include/atidlas/cl_ext/backend.h
Normal file
@@ -0,0 +1,40 @@
|
||||
#ifndef ATIDLAS_CL_QUEUES_H
|
||||
#define ATIDLAS_CL_QUEUES_H
|
||||
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <CL/cl.hpp>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
class queues_type
|
||||
{
|
||||
private:
|
||||
void append(cl::Context const &);
|
||||
void init();
|
||||
public:
|
||||
typedef std::list<std::pair<cl::Context, std::vector<cl::CommandQueue> > > data_type;
|
||||
std::vector<cl::CommandQueue> & operator[](cl::Context const &);
|
||||
cl::Context default_context();
|
||||
data_type const & data();
|
||||
private:
|
||||
data_type data_;
|
||||
};
|
||||
|
||||
typedef std::map<std::pair<cl_program, unsigned int>, cl::Kernel> kernels_type;
|
||||
|
||||
extern kernels_type kernels;
|
||||
extern queues_type queues;
|
||||
extern unsigned int default_context_idx;
|
||||
extern cl_command_queue_properties queue_properties;
|
||||
|
||||
void synchronize(cl::Context const & context);
|
||||
cl::Context default_context();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -2,7 +2,7 @@
|
||||
#define ATIDLAS_CL_LAZY_COMPILER_H
|
||||
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/cl/program_map.h"
|
||||
#include "atidlas/cl_ext/program_map.h"
|
||||
|
||||
namespace atidlas
|
||||
{
|
@@ -6,8 +6,8 @@
|
||||
#include <map>
|
||||
|
||||
#include "atidlas/backend/templates/base.h"
|
||||
#include "atidlas/cl/compare.hpp"
|
||||
#include "atidlas/cl/lazy_compiler.h"
|
||||
#include "atidlas/cl_ext/compare.hpp"
|
||||
#include "atidlas/cl_ext/lazy_compiler.h"
|
||||
#include "atidlas/model/predictors/random_forest.h"
|
||||
#include "atidlas/symbolic/expression.h"
|
||||
|
||||
@@ -16,7 +16,7 @@ namespace atidlas
|
||||
|
||||
class model
|
||||
{
|
||||
typedef std::vector< tools::shared_ptr<base> > templates_container;
|
||||
typedef std::vector< std::shared_ptr<base> > templates_container;
|
||||
public:
|
||||
struct runtime_options
|
||||
{
|
||||
@@ -34,8 +34,8 @@ namespace atidlas
|
||||
std::vector<cl_ext::lazy_compiler>& init(expressions_tuple const & expressions, runtime_options const & opt = runtime_options());
|
||||
|
||||
public:
|
||||
model(predictors::random_forest const &, std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||
model(std::vector< tools::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||
model(predictors::random_forest const &, std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||
model(std::vector< std::shared_ptr<base> > const &, cl::CommandQueue &);
|
||||
model(base const &, cl::CommandQueue &);
|
||||
|
||||
void execute(expressions_tuple const &, operation_cache * cache = NULL, runtime_options const & opt = runtime_options());
|
||||
@@ -44,13 +44,13 @@ namespace atidlas
|
||||
templates_container const & templates() const;
|
||||
private:
|
||||
templates_container templates_;
|
||||
tools::shared_ptr<predictors::random_forest> predictor_;
|
||||
std::shared_ptr<predictors::random_forest> predictor_;
|
||||
std::map<std::vector<int_t>, int> hardcoded_;
|
||||
std::map<cl_context, std::map<std::string, std::vector<cl_ext::lazy_compiler> > > lazy_programs_;
|
||||
cl::CommandQueue & queue_;
|
||||
};
|
||||
|
||||
typedef std::map<std::pair<expression_type, numeric_type>, tools::shared_ptr<model> > model_map_t;
|
||||
typedef std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<model> > model_map_t;
|
||||
|
||||
model_map_t init_models(cl::CommandQueue const & queue);
|
||||
model_map_t& get_model_map(cl::CommandQueue & queue);
|
||||
|
@@ -6,7 +6,7 @@
|
||||
#include <CL/cl.hpp>
|
||||
#include "atidlas/types.h"
|
||||
#include "atidlas/value_scalar.h"
|
||||
#include "atidlas/tools/shared_ptr.hpp"
|
||||
#include <memory>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
@@ -168,7 +168,7 @@ void fill(lhs_rhs_element & x, array const & a);
|
||||
void fill(lhs_rhs_element & x, value_scalar const & v);
|
||||
void fill(lhs_rhs_element & x, repeat_infos const & r);
|
||||
|
||||
class array_expression
|
||||
class array_expression : public array_base
|
||||
{
|
||||
public:
|
||||
struct node
|
||||
@@ -208,21 +208,21 @@ private:
|
||||
size4 shape_;
|
||||
};
|
||||
|
||||
class control
|
||||
template<class TYPE>
|
||||
class controller
|
||||
{
|
||||
public:
|
||||
control(array_expression const & x, cl::Event* event = NULL, std::vector<cl::Event>* dependencies = NULL,
|
||||
controller(TYPE const & x, cl::Event* event = NULL, std::vector<cl::Event>* dependencies = NULL,
|
||||
cl::CommandQueue* queue = NULL, operation_cache* cache = NULL) : x_(x), event_(event), dependencies_(dependencies), queue_(queue), cache_(cache){}
|
||||
|
||||
array_expression const & expression() const { return x_; }
|
||||
TYPE const & x() const { return x_; }
|
||||
cl::Event* event() const { return event_; }
|
||||
std::vector<cl::Event>* dependencies() const { return dependencies_; }
|
||||
cl::CommandQueue* queue() const { return queue_; }
|
||||
operation_cache* cache() const { return cache_; }
|
||||
|
||||
private:
|
||||
array_expression const & x_;
|
||||
|
||||
TYPE const & x_;
|
||||
cl::Event* event_;
|
||||
std::vector<cl::Event>* dependencies_;
|
||||
cl::CommandQueue* queue_;
|
||||
@@ -232,9 +232,9 @@ private:
|
||||
class expressions_tuple
|
||||
{
|
||||
private:
|
||||
tools::shared_ptr<array_expression> create(array_expression const & s);
|
||||
std::shared_ptr<array_expression> create(array_expression const & s);
|
||||
public:
|
||||
typedef std::list<tools::shared_ptr<array_expression> > data_type;
|
||||
typedef std::list<std::shared_ptr<array_expression> > data_type;
|
||||
enum order_type { SEQUENTIAL, INDEPENDENT };
|
||||
|
||||
expressions_tuple(array_expression const & s0);
|
||||
|
@@ -1,162 +0,0 @@
|
||||
#ifndef ATIDLAS_TOOLS_SHARED_PTR_HPP
|
||||
#define ATIDLAS_TOOLS_SHARED_PTR_HPP
|
||||
|
||||
/* =========================================================================
|
||||
Copyright (c) 2010-2012, Institute for Microelectronics,
|
||||
Institute for Analysis and Scientific Computing,
|
||||
TU Wien.
|
||||
Portions of this software are copyright by UChicago Argonne, LLC.
|
||||
|
||||
-----------------
|
||||
ViennaCL - The Vienna Computing Library
|
||||
-----------------
|
||||
|
||||
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
|
||||
|
||||
(A list of authors and contributors can be found in the PDF manual)
|
||||
|
||||
License: MIT (X11), see file LICENSE in the base directory
|
||||
============================================================================= */
|
||||
|
||||
/** @file tools/shared_ptr.hpp
|
||||
@brief Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
|
||||
|
||||
Contributed by Philippe Tillet.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
namespace tools
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
|
||||
/** @brief Reference counting class for the shared_ptr implementation */
|
||||
class count
|
||||
{
|
||||
public:
|
||||
count(unsigned int val) : val_(val){ }
|
||||
void dec(){ --val_; }
|
||||
void inc(){ ++val_; }
|
||||
bool is_null(){ return val_ == 0; }
|
||||
unsigned int val(){ return val_; }
|
||||
private:
|
||||
unsigned int val_;
|
||||
};
|
||||
|
||||
/** @brief Interface for the reference counter inside the shared_ptr */
|
||||
struct aux
|
||||
{
|
||||
detail::count count;
|
||||
|
||||
aux() :count(1) {}
|
||||
virtual void destroy()=0;
|
||||
virtual ~aux() {}
|
||||
};
|
||||
|
||||
/** @brief Implementation helper for the reference counting mechanism inside shared_ptr. */
|
||||
template<class U, class Deleter>
|
||||
struct auximpl: public detail::aux
|
||||
{
|
||||
U* p;
|
||||
Deleter d;
|
||||
|
||||
auximpl(U* pu, Deleter x) :p(pu), d(x) {}
|
||||
virtual void destroy() { d(p); }
|
||||
};
|
||||
|
||||
/** @brief Default deleter class for a pointer. The default is to just call 'delete' on the pointer. Provide your own implementations for 'delete[]' and 'free'. */
|
||||
template<class U>
|
||||
struct default_deleter
|
||||
{
|
||||
void operator()(U* p) const { delete p; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
class shared_ptr_base
|
||||
{
|
||||
protected:
|
||||
detail::aux* pa;
|
||||
public:
|
||||
unsigned int count() { return pa->count.val(); }
|
||||
};
|
||||
|
||||
/** @brief A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependency. Will be replaced by std::shared_ptr as soon as C++11 is widely available. */
|
||||
template<class T>
|
||||
class shared_ptr : public shared_ptr_base
|
||||
{
|
||||
template<class U>
|
||||
friend class shared_ptr;
|
||||
|
||||
detail::aux* pa;
|
||||
T* pt;
|
||||
|
||||
public:
|
||||
|
||||
shared_ptr() :pa(NULL), pt(NULL) {}
|
||||
|
||||
template<class U, class Deleter>
|
||||
shared_ptr(U* pu, Deleter d) : pa(new detail::auximpl<U, Deleter>(pu, d)), pt(pu) {}
|
||||
|
||||
template<class U>
|
||||
explicit shared_ptr(U* pu) : pa(new detail::auximpl<U, detail::default_deleter<U> >(pu, detail::default_deleter<U>())), pt(pu) {}
|
||||
|
||||
template<class U>
|
||||
shared_ptr(const shared_ptr<U>& s) :pa(s.pa), pt(s.pt) { inc(); }
|
||||
|
||||
shared_ptr(const shared_ptr& s) :pa(s.pa), pt(s.pt) { inc(); }
|
||||
~shared_ptr() { dec(); }
|
||||
|
||||
T* get() const { return pt; }
|
||||
T* operator->() const { return pt; }
|
||||
T& operator*() const { return *pt; }
|
||||
|
||||
void reset() { shared_ptr<T>().swap(*this); }
|
||||
void reset(T * ptr) { shared_ptr<T>(ptr).swap(*this); }
|
||||
|
||||
void swap(shared_ptr<T> & other)
|
||||
{
|
||||
std::swap(pt,other.pt);
|
||||
std::swap(pa, other.pa);
|
||||
}
|
||||
|
||||
shared_ptr& operator=(const shared_ptr& s)
|
||||
{
|
||||
if (this!=&s)
|
||||
{
|
||||
dec();
|
||||
pa = s.pa;
|
||||
pt = s.pt;
|
||||
inc();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void inc()
|
||||
{
|
||||
if (pa) pa->count.inc();
|
||||
}
|
||||
|
||||
void dec()
|
||||
{
|
||||
if (pa)
|
||||
{
|
||||
pa->count.dec();
|
||||
if (pa->count.is_null())
|
||||
{
|
||||
pa->destroy();
|
||||
delete pa;
|
||||
pa = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -83,8 +83,8 @@ public:
|
||||
|
||||
void enqueue()
|
||||
{
|
||||
for(std::list<infos>::iterator it = l_.begin() ; it != l_.end() ; ++it)
|
||||
it->queue.enqueueNDRangeKernel(it->kernel, it->offset, it->grange, it->lrange);
|
||||
for(infos & elem : l_)
|
||||
elem.queue.enqueueNDRangeKernel(elem.kernel, elem.offset, elem.grange, elem.lrange);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -173,7 +173,8 @@ struct slice
|
||||
};
|
||||
typedef slice _;
|
||||
|
||||
class obj_base{};
|
||||
class array_base{ };
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
182
lib/array.cpp
182
lib/array.cpp
@@ -11,40 +11,27 @@
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
array_infos array::init_infos(numeric_type dtype, cl_mem data, int_t shape1, int_t shape2, int_t start1, int_t start2, int_t stride1, int_t stride2, int_t ld)
|
||||
{
|
||||
array_infos res;
|
||||
res.dtype = dtype;
|
||||
res.data = data;
|
||||
res.shape1 = shape1;
|
||||
res.shape2 = shape2;
|
||||
res.start1 = start1;
|
||||
res.start2 = start2;
|
||||
res.stride1 = stride1;
|
||||
res.stride2 = stride2;
|
||||
res.ld = ld;
|
||||
return res;
|
||||
}
|
||||
|
||||
/*--- Constructors ---*/
|
||||
|
||||
//1D Constructors
|
||||
|
||||
array::array(int_t size1, numeric_type dtype, cl::Context context) :
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*size1),
|
||||
infos_(init_infos(dtype, data_(), size1, 1, 0, 0, 1, 1, size1))
|
||||
{}
|
||||
dtype_(dtype), shape_(size1, 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*dsize())
|
||||
{ }
|
||||
|
||||
template<class DT>
|
||||
array::array(std::vector<DT> const & x, cl::Context context):
|
||||
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(to_numeric_type<DT>::value)*x.size()),
|
||||
infos_(init_infos(to_numeric_type<DT>::value, data_(), x.size(), 1, 0, 0, 1, 1, x.size()))
|
||||
dtype_(to_numeric_type<DT>::value), shape_(x.size(), 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
|
||||
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
|
||||
{ *this = x; }
|
||||
|
||||
array::array(array & v, slice const & s1) : context_(v.data_.getInfo<CL_MEM_CONTEXT>()), data_(v.data_),
|
||||
infos_(init_infos(v.infos_.dtype, data_(), s1.size, 1, v.infos_.start1 + v.infos_.stride1*s1.start, 0, v.infos_.stride1*s1.stride, 1, v.infos_.ld))
|
||||
array::array(array & v, slice const & s1) : dtype_(v.dtype_), shape_(s1.size, 1), start_(v.start_._1 + v.stride_._1*s1.start, 0), stride_(v.stride_._1*s1.stride, 1),
|
||||
ld_(v.ld_), context_(v.data_.getInfo<CL_MEM_CONTEXT>()), data_(v.data_)
|
||||
{}
|
||||
|
||||
#define INSTANTIATE(T) template array::array<T>(std::vector<T> const &, cl::Context)
|
||||
#define INSTANTIATE(T) template array::array(std::vector<T> const &, cl::Context)
|
||||
INSTANTIATE(cl_char);
|
||||
INSTANTIATE(cl_uchar);
|
||||
INSTANTIATE(cl_short);
|
||||
@@ -58,26 +45,26 @@ INSTANTIATE(cl_double);
|
||||
#undef INSTANTIATE
|
||||
|
||||
// 2D
|
||||
array::array(int_t size1, int_t size2, numeric_type dtype, cl::Context context) :
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*size1*size2),
|
||||
infos_(init_infos(dtype, data_(), size1, size2, 0, 0, 1, 1, size1))
|
||||
array::array(int_t size1, int_t size2, numeric_type dtype, cl::Context context) : dtype_(dtype), shape_(size1, size2), start_(0, 0), stride_(1, 1), ld_(size1),
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
|
||||
{}
|
||||
|
||||
array::array(array & M, slice const & s1, slice const & s2) :
|
||||
context_(M.data_.getInfo<CL_MEM_CONTEXT>()), data_(M.data_),
|
||||
infos_(init_infos(M.dtype(), data_(), s1.size, s2.size, M.start()._1 + M.stride()._1*s1.start, M.start()._2 + M.stride()._2*s2.start,
|
||||
M.stride()._1*s1.stride, M.stride()._2*s2.stride, M.ld()))
|
||||
array::array(array & M, slice const & s1, slice const & s2) : dtype_(M.dtype_), shape_(s1.size, s2.size),
|
||||
start_(M.start_._1 + M.stride_._1*s1.start, M.start_._2 + M.stride_._2*s2.start),
|
||||
stride_(M.stride_._1*s1.stride, M.stride_._2*s2.stride), ld_(M.ld_),
|
||||
context_(M.data_.getInfo<CL_MEM_CONTEXT>()), data_(M.data_)
|
||||
{ }
|
||||
|
||||
template<typename DT>
|
||||
array::array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context):
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(to_numeric_type<DT>::value)*size1*size2),
|
||||
infos_(init_infos(to_numeric_type<DT>::value, data_(), size1, size2, 0, 0, 1, 1, size1))
|
||||
array::array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context)
|
||||
: dtype_(to_numeric_type<DT>::value),
|
||||
shape_(size1, size2), start_(0, 0), stride_(1, 1), ld_(size1),
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
|
||||
{
|
||||
atidlas::copy(data, *this);
|
||||
}
|
||||
|
||||
#define INSTANTIATE(T) template array::array<T>(int_t, int_t, std::vector<T> const &, cl::Context)
|
||||
#define INSTANTIATE(T) template array::array(int_t, int_t, std::vector<T> const &, cl::Context)
|
||||
INSTANTIATE(cl_char);
|
||||
INSTANTIATE(cl_uchar);
|
||||
INSTANTIATE(cl_short);
|
||||
@@ -92,43 +79,45 @@ INSTANTIATE(cl_double);
|
||||
|
||||
// General
|
||||
array::array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context):
|
||||
context_(context), data_(data),
|
||||
infos_(init_infos(dtype, data_(), s1.size, s2.size, s1.start, s2.start, s1.stride, s2.stride, ld))
|
||||
dtype_(dtype), shape_(s1.size, s2.size), start_(s1.start, s2.start), stride_(s1.stride, s2.stride),
|
||||
ld_(ld), context_(context), data_(data)
|
||||
{ }
|
||||
|
||||
array::array(control const & x):
|
||||
context_(x.expression().context()), data_(context_, CL_MEM_READ_WRITE, size_of(x.expression().dtype())*prod(x.expression().shape())),
|
||||
infos_(init_infos(x.expression().dtype(), data_(), x.expression().shape()._1, x.expression().shape()._2, 0, 0, 1, 1, x.expression().shape()._1))
|
||||
array::array(array_expression const & proxy) :
|
||||
dtype_(proxy.dtype()),
|
||||
shape_(proxy.shape()), start_(0,0), stride_(1, 1), ld_(shape_._1),
|
||||
context_(proxy.context()), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
|
||||
{
|
||||
*this = x;
|
||||
*this = proxy;
|
||||
}
|
||||
|
||||
array::array(array const & x) :
|
||||
context_(x.context()), data_(context_, CL_MEM_READ_WRITE, size_of(x.dtype())*x.shape()._1*x.shape()._2),
|
||||
infos_(init_infos(x.dtype(), data_(), x.shape()._1, x.shape()._2, 0, 0, 1, 1, x.shape()._1))
|
||||
array::array(array const & other) :
|
||||
dtype_(other.dtype()),
|
||||
shape_(other.shape()), start_(0,0), stride_(1, 1), ld_(shape_._1),
|
||||
context_(other.context()), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
|
||||
{
|
||||
*this = x;
|
||||
*this = other;
|
||||
}
|
||||
|
||||
|
||||
/*--- Getters ---*/
|
||||
numeric_type array::dtype() const
|
||||
{ return infos_.dtype; }
|
||||
{ return dtype_; }
|
||||
|
||||
size4 array::shape() const
|
||||
{ return size4(infos_.shape1, infos_.shape2); }
|
||||
{ return shape_; }
|
||||
|
||||
int_t array::nshape() const
|
||||
{ return int_t((infos_.shape1 > 1) + (infos_.shape2 > 1)); }
|
||||
{ return int_t((shape_._1 > 1) + (shape_._2 > 1)); }
|
||||
|
||||
size4 array::start() const
|
||||
{ return size4(infos_.start1, infos_.start2); }
|
||||
{ return start_; }
|
||||
|
||||
size4 array::stride() const
|
||||
{ return size4(infos_.stride1, infos_.stride2); }
|
||||
{ return stride_; }
|
||||
|
||||
int_t array::ld() const
|
||||
{ return infos_.ld; }
|
||||
{ return ld_; }
|
||||
|
||||
cl::Context const & array::context() const
|
||||
{ return context_; }
|
||||
@@ -137,27 +126,25 @@ cl::Buffer const & array::data() const
|
||||
{ return data_; }
|
||||
|
||||
int_t array::dsize() const
|
||||
{ return infos_.ld*infos_.shape2; }
|
||||
{ return ld_*shape_._2; }
|
||||
|
||||
/*--- Assignment Operators ----*/
|
||||
//---------------------------------------
|
||||
array & array::operator=(array const & rhs)
|
||||
{
|
||||
assert(dtype() == rhs.dtype());
|
||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype(), shape());
|
||||
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
|
||||
assert(dtype_ == rhs.dtype());
|
||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), context_, dtype_, shape_);
|
||||
cl::CommandQueue & queue = cl_ext::queues[context_][0];
|
||||
model_map_t & mmap = atidlas::get_model_map(queue);
|
||||
execute(expression, mmap);
|
||||
return *this;
|
||||
}
|
||||
|
||||
array & array::operator=(control const & x)
|
||||
array & array::operator=(array_expression const & rhs)
|
||||
{
|
||||
array_expression const & rhs = x.expression();
|
||||
|
||||
assert(dtype() == rhs.dtype());
|
||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), dtype(), shape());
|
||||
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
|
||||
assert(dtype_ == rhs.dtype());
|
||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), dtype_, shape_);
|
||||
cl::CommandQueue & queue = cl_ext::queues[context_][0];
|
||||
model_map_t & mmap = atidlas::get_model_map(queue);
|
||||
execute(expression, mmap);
|
||||
return *this;
|
||||
@@ -186,47 +173,47 @@ INSTANTIATE(cl_double);
|
||||
#undef INSTANTIATE
|
||||
|
||||
array_expression array::operator-()
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype(), shape()); }
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array_expression array::operator!()
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape()); }
|
||||
{ return array_expression(*this, invalid_node(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_NEGATE_TYPE), context_, INT_TYPE, shape_); }
|
||||
|
||||
//
|
||||
array & array::operator+=(value_scalar const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator+=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator+=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), dtype_, shape_); }
|
||||
//----
|
||||
array & array::operator-=(value_scalar const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator-=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator-=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_SUB_TYPE), dtype_, shape_); }
|
||||
//----
|
||||
array & array::operator*=(value_scalar const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator*=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator*=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_MULT_TYPE), dtype_, shape_); }
|
||||
//----
|
||||
array & array::operator/=(value_scalar const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator/=(array const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), context_, dtype_, shape_); }
|
||||
|
||||
array & array::operator/=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype(), shape()); }
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), dtype_, shape_); }
|
||||
|
||||
array_expression array::T() const
|
||||
{ return atidlas::trans(*this) ;}
|
||||
@@ -236,13 +223,13 @@ array_expression array::T() const
|
||||
scalar array::operator [](int_t idx)
|
||||
{
|
||||
assert(nshape()==1);
|
||||
return scalar(dtype(), data_, idx, context_);
|
||||
return scalar(dtype_, data_, idx, context_);
|
||||
}
|
||||
|
||||
const scalar array::operator [](int_t idx) const
|
||||
{
|
||||
assert(nshape()==1);
|
||||
return scalar(dtype(), data_, idx, context_);
|
||||
return scalar(dtype_, data_, idx, context_);
|
||||
}
|
||||
|
||||
|
||||
@@ -263,7 +250,7 @@ namespace detail
|
||||
template<class T>
|
||||
void copy(cl::Context & ctx, cl::Buffer const & data, T value)
|
||||
{
|
||||
cl_ext::get_queue(ctx, 0).enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
|
||||
cl_ext::queues[ctx][0].enqueueWriteBuffer(data, CL_TRUE, 0, sizeof(T), (void*)&value);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -273,7 +260,7 @@ scalar::scalar(numeric_type dtype, const cl::Buffer &data, int_t offset, cl::Con
|
||||
|
||||
scalar::scalar(value_scalar value, cl::Context context) : array(1, value.dtype(), context)
|
||||
{
|
||||
switch(dtype())
|
||||
switch(dtype_)
|
||||
{
|
||||
// case BOOL_TYPE: detail::copy(context_, data_, (cl_bool)value); break;
|
||||
case CHAR_TYPE: detail::copy(context_, data_, (cl_char)value); break;
|
||||
@@ -287,7 +274,7 @@ scalar::scalar(value_scalar value, cl::Context context) : array(1, value.dtype()
|
||||
// case HALF_TYPE: detail::copy(context_, data_, (cl_float)value); break;
|
||||
case FLOAT_TYPE: detail::copy(context_, data_, (cl_float)value); break;
|
||||
case DOUBLE_TYPE: detail::copy(context_, data_, (cl_double)value); break;
|
||||
default: throw unknown_datatype(dtype());
|
||||
default: throw unknown_datatype(dtype_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -295,19 +282,19 @@ scalar::scalar(value_scalar value, cl::Context context) : array(1, value.dtype()
|
||||
scalar::scalar(numeric_type dtype, cl::Context context) : array(1, dtype, context)
|
||||
{ }
|
||||
|
||||
scalar::scalar(control const &proxy) : array(proxy){ }
|
||||
scalar::scalar(array_expression const & proxy) : array(proxy){ }
|
||||
|
||||
template<class T>
|
||||
T scalar::cast() const
|
||||
{
|
||||
values_holder v;
|
||||
int_t dtsize = size_of(dtype());
|
||||
int_t dtsize = size_of(dtype_);
|
||||
#define HANDLE_CASE(DTYPE, VAL) \
|
||||
case DTYPE:\
|
||||
cl_ext::get_queue(context_, 0).enqueueReadBuffer(data_, CL_TRUE, infos_.start1*dtsize, dtsize, (void*)&v.VAL);\
|
||||
cl_ext::queues[context_][0].enqueueReadBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v.VAL);\
|
||||
return v.VAL
|
||||
|
||||
switch(dtype())
|
||||
switch(dtype_)
|
||||
{
|
||||
// HANDLE_CASE(BOOL_TYPE, bool8);
|
||||
HANDLE_CASE(CHAR_TYPE, int8);
|
||||
@@ -321,7 +308,7 @@ case DTYPE:\
|
||||
// HANDLE_CASE(HALF_TYPE, float16);
|
||||
HANDLE_CASE(FLOAT_TYPE, float32);
|
||||
HANDLE_CASE(DOUBLE_TYPE, float64);
|
||||
default: throw unknown_datatype(dtype());
|
||||
default: throw unknown_datatype(dtype_);
|
||||
}
|
||||
#undef HANDLE_CASE
|
||||
|
||||
@@ -329,16 +316,16 @@ case DTYPE:\
|
||||
|
||||
scalar& scalar::operator=(value_scalar const & s)
|
||||
{
|
||||
cl::CommandQueue& queue = cl_ext::get_queue(context_, 0);
|
||||
int_t dtsize = size_of(dtype());
|
||||
cl::CommandQueue& queue = cl_ext::queues[context_][0];
|
||||
int_t dtsize = size_of(dtype_);
|
||||
|
||||
#define HANDLE_CASE(TYPE, CLTYPE) case TYPE:\
|
||||
{\
|
||||
CLTYPE v = s;\
|
||||
queue.enqueueWriteBuffer(data_, CL_TRUE, infos_.start1*dtsize, dtsize, (void*)&v);\
|
||||
queue.enqueueWriteBuffer(data_, CL_TRUE, start_._1*dtsize, dtsize, (void*)&v);\
|
||||
return *this;\
|
||||
}
|
||||
switch(dtype())
|
||||
switch(dtype_)
|
||||
{
|
||||
// HANDLE_CASE(BOOL_TYPE, cl_bool)
|
||||
HANDLE_CASE(CHAR_TYPE, cl_char)
|
||||
@@ -352,7 +339,7 @@ scalar& scalar::operator=(value_scalar const & s)
|
||||
// HANDLE_CASE(HALF_TYPE, cl_half)
|
||||
HANDLE_CASE(FLOAT_TYPE, cl_float)
|
||||
HANDLE_CASE(DOUBLE_TYPE, cl_double)
|
||||
default: throw unknown_datatype(dtype());
|
||||
default: throw unknown_datatype(dtype_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -707,18 +694,11 @@ namespace detail
|
||||
array reshape(array const & a, int_t size1, int_t size2)
|
||||
{
|
||||
array tmp(a);
|
||||
tmp.infos_.shape1 = size1;
|
||||
tmp.infos_.shape2 = size2;
|
||||
tmp.shape_._1 = size1;
|
||||
tmp.shape_._2 = size2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
array reshape(array_expression const & a, int_t size1, int_t size2)
|
||||
{
|
||||
array tmp(a);
|
||||
tmp.infos_.shape1 = size1;
|
||||
tmp.infos_.shape2 = size2;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
#define DEFINE_DOT(LTYPE, RTYPE) \
|
||||
array_expression dot(LTYPE const & x, RTYPE const & y)\
|
||||
@@ -804,10 +784,10 @@ void copy(array const & x, void* data, cl::CommandQueue & queue, bool blocking)
|
||||
}
|
||||
|
||||
void copy(void const *data, array &x, bool blocking)
|
||||
{ copy(data, x, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(data, x, cl_ext::queues[x.context()][0], blocking); }
|
||||
|
||||
void copy(array const & x, void* data, bool blocking)
|
||||
{ copy(x, data, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(x, data, cl_ext::queues[x.context()][0], blocking); }
|
||||
|
||||
//std::vector<>
|
||||
template<class T>
|
||||
@@ -832,11 +812,11 @@ void copy(array const & x, std::vector<T> & cx, cl::CommandQueue & queue, bool b
|
||||
|
||||
template<class T>
|
||||
void copy(std::vector<T> const & cx, array & x, bool blocking)
|
||||
{ copy(cx, x, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(cx, x, cl_ext::queues[x.context()][0], blocking); }
|
||||
|
||||
template<class T>
|
||||
void copy(array const & x, std::vector<T> & cx, bool blocking)
|
||||
{ copy(x, cx, cl_ext::get_queue(x.context(), 0), blocking); }
|
||||
{ copy(x, cx, cl_ext::queues[x.context()][0], blocking); }
|
||||
|
||||
#define INSTANTIATE(T) \
|
||||
template void copy<T>(std::vector<T> const &, array &, cl::CommandQueue&, bool);\
|
||||
|
@@ -74,8 +74,8 @@ std::string mapped_object::process(std::string const & in) const
|
||||
{
|
||||
std::string res(in);
|
||||
preprocess(res);
|
||||
for (std::map<std::string,std::string>::const_iterator it = keywords_.begin(); it != keywords_.end(); ++it)
|
||||
tools::find_and_replace(res, it->first, it->second);
|
||||
for (const auto & elem : keywords_)
|
||||
tools::find_and_replace(res, elem.first, elem.second);
|
||||
postprocess(res);
|
||||
return res;
|
||||
}
|
||||
|
@@ -30,44 +30,44 @@ numeric_type base::map_functor::get_numeric_type(atidlas::array_expression const
|
||||
|
||||
/** @brief Binary leaf */
|
||||
template<class T>
|
||||
tools::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
|
||||
std::shared_ptr<mapped_object> base::map_functor::binary_leaf(atidlas::array_expression const * array_expression, int_t root_idx, mapping_type const * mapping) const
|
||||
{
|
||||
return tools::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
|
||||
return std::shared_ptr<mapped_object>(new T(numeric_type_to_string(array_expression->dtype()), binder_.get(NULL), mapped_object::node_info(mapping, array_expression, root_idx)));
|
||||
}
|
||||
|
||||
/** @brief Scalar mapping */
|
||||
tools::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
|
||||
std::shared_ptr<mapped_object> base::map_functor::create(numeric_type dtype, values_holder) const
|
||||
{
|
||||
std::string strdtype = numeric_type_to_string(dtype);
|
||||
return tools::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
|
||||
return std::shared_ptr<mapped_object>(new mapped_host_scalar(strdtype, binder_.get(NULL)));
|
||||
}
|
||||
|
||||
/** @brief Vector mapping */
|
||||
tools::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
|
||||
std::shared_ptr<mapped_object> base::map_functor::create(array_infos const & a) const
|
||||
{
|
||||
std::string dtype = numeric_type_to_string(a.dtype);
|
||||
unsigned int id = binder_.get(a.data);
|
||||
//Scalar
|
||||
if(a.shape1==1 && a.shape2==1)
|
||||
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
|
||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 's'));
|
||||
//Column vector
|
||||
else if(a.shape1>1 && a.shape2==1)
|
||||
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
|
||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'c'));
|
||||
//Row vector
|
||||
else if(a.shape1==1 && a.shape2>1)
|
||||
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
|
||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'r'));
|
||||
//Matrix
|
||||
else
|
||||
return tools::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
|
||||
return std::shared_ptr<mapped_object>(new mapped_array(dtype, id, 'm'));
|
||||
}
|
||||
|
||||
tools::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
|
||||
std::shared_ptr<mapped_object> base::map_functor::create(repeat_infos const &) const
|
||||
{
|
||||
//TODO: Make it less specific!
|
||||
return tools::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
|
||||
return std::shared_ptr<mapped_object>(new mapped_tuple("int",binder_.get(NULL),4));
|
||||
}
|
||||
|
||||
tools::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
|
||||
std::shared_ptr<mapped_object> base::map_functor::create(lhs_rhs_element const & lhs_rhs) const
|
||||
{
|
||||
switch(lhs_rhs.type_family)
|
||||
{
|
||||
@@ -111,7 +111,7 @@ void base::map_functor::operator()(atidlas::array_expression const & array_expre
|
||||
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_outer>(&array_expression, root_idx, &mapping_)));
|
||||
else if (detail::is_cast(root_node.op))
|
||||
mapping_.insert(mapping_type::value_type(key, tools::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
|
||||
mapping_.insert(mapping_type::value_type(key, std::shared_ptr<mapped_object>(new mapped_cast(root_node.op.type, binder_.get(NULL)))));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,8 +219,8 @@ void base::compute_index_reduction(kernel_generation_stream & os, std::string ac
|
||||
void base::process_all(std::string const & type_key, std::string const & str,
|
||||
kernel_generation_stream & stream, std::vector<mapping_type> const & mappings)
|
||||
{
|
||||
for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit)
|
||||
for (mapping_type::const_iterator mmit = mit->begin(); mmit != mit->end(); ++mmit)
|
||||
for (const auto & mapping : mappings)
|
||||
for (mapping_type::const_iterator mmit = mapping.begin(); mmit != mapping.end(); ++mmit)
|
||||
if (mmit->second->type_key()==type_key)
|
||||
stream << mmit->second->process(str) << std::endl;
|
||||
}
|
||||
@@ -230,9 +230,9 @@ void base::base::process_all_at(std::string const & type_key, std::string const
|
||||
kernel_generation_stream & stream, std::vector<mapping_type> const & mappings,
|
||||
size_t root_idx, leaf_t leaf)
|
||||
{
|
||||
for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit)
|
||||
for (const auto & mapping : mappings)
|
||||
{
|
||||
mapped_object * obj = mit->at(mapping_key(root_idx, leaf)).get();
|
||||
mapped_object * obj = mapping.at(mapping_key(root_idx, leaf)).get();
|
||||
if (obj->type_key()==type_key)
|
||||
stream << obj->process(str) << std::endl;
|
||||
}
|
||||
@@ -280,9 +280,9 @@ std::string base::generate_arguments(std::string const & data_type, std::vector<
|
||||
|
||||
void base::set_arguments(expressions_tuple const & expressions, cl::Kernel & kernel, unsigned int & current_arg)
|
||||
{
|
||||
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
for (expressions_tuple::data_type::const_iterator itt = expressions.data().begin(); itt != expressions.data().end(); ++itt)
|
||||
traverse(**itt, (*itt)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
|
||||
std::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
for (const auto & elem : expressions.data())
|
||||
traverse(*elem, (elem)->root(), set_arguments_functor(*binder, current_arg, kernel), true);
|
||||
}
|
||||
|
||||
void base::fill_kernel_name(char * ptr, unsigned int label, const char * suffix)
|
||||
@@ -372,8 +372,8 @@ bool base::is_strided(array_expression::node const & node)
|
||||
|
||||
bool base::requires_fallback(expressions_tuple const & expressions)
|
||||
{
|
||||
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
|
||||
for(array_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt)
|
||||
for (const auto & elem : expressions.data())
|
||||
for(array_expression::container_type::const_iterator itt = (elem)->tree().begin(); itt != (elem)->tree().end() ; ++itt)
|
||||
if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0))
|
||||
|| (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0)))
|
||||
return true;
|
||||
@@ -478,12 +478,12 @@ unsigned int base::align(unsigned int to_round, unsigned int base)
|
||||
return (to_round + base - 1)/base * base;
|
||||
}
|
||||
|
||||
tools::shared_ptr<symbolic_binder> base::make_binder()
|
||||
std::shared_ptr<symbolic_binder> base::make_binder()
|
||||
{
|
||||
if (binding_policy_==BIND_TO_HANDLE)
|
||||
return tools::shared_ptr<symbolic_binder>(new bind_to_handle());
|
||||
return std::shared_ptr<symbolic_binder>(new bind_to_handle());
|
||||
else
|
||||
return tools::shared_ptr<symbolic_binder>(new bind_all_unique());
|
||||
return std::shared_ptr<symbolic_binder>(new bind_all_unique());
|
||||
}
|
||||
|
||||
|
||||
@@ -509,7 +509,7 @@ std::vector<std::string> base::generate(unsigned int label, expressions_tuple co
|
||||
|
||||
//Create mapping
|
||||
std::vector<mapping_type> mappings(expressions.data().size());
|
||||
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
std::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
for (mit = mappings.begin(), sit = expressions.data().begin(); sit != expressions.data().end(); ++sit, ++mit)
|
||||
traverse(**sit, (*sit)->root(), map_functor(*binder,*mit), true);
|
||||
|
||||
@@ -533,8 +533,8 @@ int_t base_impl<TType, PType>::local_size_1() const
|
||||
{ return p_.local_size_1; }
|
||||
|
||||
template<class TType, class PType>
|
||||
tools::shared_ptr<base> base_impl<TType, PType>::clone() const
|
||||
{ return tools::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
|
||||
std::shared_ptr<base> base_impl<TType, PType>::clone() const
|
||||
{ return std::shared_ptr<base>(new TType(*dynamic_cast<TType const *>(this))); }
|
||||
|
||||
template<class TType, class PType>
|
||||
int base_impl<TType, PType>::check_invalid(expressions_tuple const & expressions, cl::Device const & device) const
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "atidlas/array.h"
|
||||
#include "atidlas/backend/templates/mproduct.h"
|
||||
#include "atidlas/cl/lazy_compiler.h"
|
||||
#include "atidlas/cl_ext/lazy_compiler.h"
|
||||
#include "atidlas/tools/make_vector.hpp"
|
||||
#include "atidlas/tools/to_string.hpp"
|
||||
|
||||
@@ -587,7 +587,7 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width
|
||||
kernel.setArg(current_arg++, cl_uint(N));
|
||||
kernel.setArg(current_arg++, cl_uint(K));
|
||||
|
||||
tools::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
std::shared_ptr<symbolic_binder> binder = make_binder();
|
||||
set_arguments_functor fun(*binder, current_arg, kernel);
|
||||
fun.set_arguments(C);
|
||||
fun.set_arguments(alpha.dtype(), alpha.values());
|
||||
|
@@ -50,8 +50,8 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
("array2", "#pointer += #start1 + #start2*#ld; "
|
||||
"#ld *= #nldstride; "), expressions, mappings);
|
||||
|
||||
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
|
||||
stream << (*it)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl;
|
||||
for (const auto & expr : exprs)
|
||||
stream << (expr)->process("__local #scalartype #name_buf[" + to_string(lsize0*lsize1) + "];") << std::endl;
|
||||
|
||||
stream << "unsigned int lid0 = get_local_id(0);" << std::endl;
|
||||
stream << "unsigned int lid1 = get_local_id(1);" << std::endl;
|
||||
@@ -59,8 +59,8 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
stream << "for(unsigned int r = get_global_id(0); r < upper_bound_0; r += get_global_size(0)){" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
|
||||
stream << (*it)->process("#scalartype #name_acc = " + neutral_element((*it)->root_op()) + ";") << std::endl;
|
||||
for (const auto & expr : exprs)
|
||||
stream << (expr)->process("#scalartype #name_acc = " + neutral_element((expr)->root_op()) + ";") << std::endl;
|
||||
|
||||
stream << "if (r < M)" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
@@ -75,7 +75,7 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
{
|
||||
std::string data_type = append_width("#scalartype",simd_width);
|
||||
|
||||
for (std::vector<mapped_mreduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
|
||||
for (const auto & elem : exprs)
|
||||
{
|
||||
std::map<std::string, std::string> accessors;
|
||||
if(reduction==REDUCE_COLUMNS)
|
||||
@@ -88,7 +88,7 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
accessors["array2"] = "#scalartype #namereg = #pointer[r*#stride1 + c*#ld];";
|
||||
accessors["repeat"] = "#scalartype #namereg = $VALUE{(r%#tuplearg0)*#stride, (c%#tuplearg1)*#stride};";
|
||||
}
|
||||
(*it)->process_recursive(stream, PARENT_NODE_TYPE, accessors);
|
||||
(elem)->process_recursive(stream, PARENT_NODE_TYPE, accessors);
|
||||
}
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
str[a] = append_simd_suffix("#namereg.s",a);
|
||||
|
||||
|
||||
for (unsigned int k = 0; k < exprs.size(); ++k)
|
||||
for (auto & elem : exprs)
|
||||
{
|
||||
for (unsigned int a = 0; a < simd_width; ++a)
|
||||
{
|
||||
@@ -109,11 +109,11 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
accessors["array2"] = str[a];
|
||||
accessors["repeat"] = "#namereg";
|
||||
accessors["array0"] = "#namereg";
|
||||
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
|
||||
if (exprs[k]->is_index_reduction())
|
||||
compute_index_reduction(stream, exprs[k]->process("#name_acc"), "c*"+to_string(simd_width) + to_string(a), exprs[k]->process("#name_acc_value"), value,exprs[k]->root_op());
|
||||
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors);
|
||||
if (elem->is_index_reduction())
|
||||
compute_index_reduction(stream, elem->process("#name_acc"), "c*"+to_string(simd_width) + to_string(a), elem->process("#name_acc_value"), value,elem->root_op());
|
||||
else
|
||||
compute_reduction(stream, exprs[k]->process("#name_acc"), value,exprs[k]->root_op());
|
||||
compute_reduction(stream, elem->process("#name_acc"), value,elem->root_op());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -126,8 +126,8 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
|
||||
for (unsigned int k = 0; k < exprs.size(); ++k)
|
||||
stream << exprs[k]->process("#name_buf[lid0*" + lsize1str + "+ lid1] = #name_acc;") << std::endl;
|
||||
for (auto & expr : exprs)
|
||||
stream << expr->process("#name_buf[lid0*" + lsize1str + "+ lid1] = #name_acc;") << std::endl;
|
||||
|
||||
stream << "#pragma unroll" << std::endl;
|
||||
stream << "for(unsigned int stride = " << p_.local_size_1/2 << "; stride >0; stride /=2)" << std::endl;
|
||||
@@ -139,13 +139,13 @@ std::string mreduction::generate_impl(unsigned int label, expressions_tuple cons
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
for (unsigned int k = 0; k < exprs.size(); k++)
|
||||
if (exprs[k]->is_index_reduction())
|
||||
compute_index_reduction(stream, exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1]"), exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]")
|
||||
, exprs[k]->process("#name_buf_value[lid0*" + lsize1str + " + lid1]"), exprs[k]->process("#name_buf_value[lid0*" + lsize1str + " + lid1 + stride]"),
|
||||
exprs[k]->root_op());
|
||||
for (auto & expr : exprs)
|
||||
if (expr->is_index_reduction())
|
||||
compute_index_reduction(stream, expr->process("#name_buf[lid0*" + lsize1str + " + lid1]"), expr->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]")
|
||||
, expr->process("#name_buf_value[lid0*" + lsize1str + " + lid1]"), expr->process("#name_buf_value[lid0*" + lsize1str + " + lid1 + stride]"),
|
||||
expr->root_op());
|
||||
else
|
||||
compute_reduction(stream,exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1]"), exprs[k]->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]"), exprs[k]->root_op());
|
||||
compute_reduction(stream,expr->process("#name_buf[lid0*" + lsize1str + " + lid1]"), expr->process("#name_buf[lid0*" + lsize1str + " + lid1 + stride]"), expr->root_op());
|
||||
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
@@ -183,8 +183,8 @@ std::vector<std::string> mreduction::generate_impl(unsigned int label, expressio
|
||||
{
|
||||
array_expression const & first_expression = *expressions.data().front();
|
||||
std::vector<size_t> idx = filter_nodes(&is_reduction, first_expression, false);
|
||||
for (unsigned int j = 0; j < idx.size(); ++j)
|
||||
exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(idx[j], PARENT_NODE_TYPE)).get()));
|
||||
for (auto & elem : idx)
|
||||
exprs.push_back((mapped_mreduction*)(mit->at(mapping_key(elem, PARENT_NODE_TYPE)).get()));
|
||||
}
|
||||
|
||||
std::vector<std::string> res;
|
||||
|
@@ -16,9 +16,9 @@ reduction_parameters::reduction_parameters(unsigned int _simd_width,
|
||||
unsigned int reduction::lmem_usage(expressions_tuple const & expressions) const
|
||||
{
|
||||
unsigned int res = 0;
|
||||
for(expressions_tuple::data_type::const_iterator it = expressions.data().begin() ; it != expressions.data().end() ; ++it)
|
||||
for(const auto & elem : expressions.data())
|
||||
{
|
||||
numeric_type numeric_t= lhs_most((*it)->tree(), (*it)->root()).lhs.dtype;
|
||||
numeric_type numeric_t= lhs_most((elem)->tree(), (elem)->root()).lhs.dtype;
|
||||
res += p_.local_size_0*size_of(numeric_t);
|
||||
}
|
||||
return res;
|
||||
@@ -43,13 +43,13 @@ inline void reduction::reduce_1d_local_memory(kernel_generation_stream & stream,
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
for (unsigned int k = 0; k < exprs.size(); k++)
|
||||
if (exprs[k]->is_index_reduction())
|
||||
compute_index_reduction(stream, exprs[k]->process(buf_str+"[lid]"), exprs[k]->process(buf_str+"[lid+stride]")
|
||||
, exprs[k]->process(buf_value_str+"[lid]"), exprs[k]->process(buf_value_str+"[lid+stride]"),
|
||||
exprs[k]->root_op());
|
||||
for (auto & expr : exprs)
|
||||
if (expr->is_index_reduction())
|
||||
compute_index_reduction(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]")
|
||||
, expr->process(buf_value_str+"[lid]"), expr->process(buf_value_str+"[lid+stride]"),
|
||||
expr->root_op());
|
||||
else
|
||||
compute_reduction(stream, exprs[k]->process(buf_str+"[lid]"), exprs[k]->process(buf_str+"[lid+stride]"), exprs[k]->root_op());
|
||||
compute_reduction(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op());
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
stream.dec_tab();
|
||||
@@ -61,8 +61,8 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
|
||||
kernel_generation_stream stream;
|
||||
|
||||
std::vector<mapped_scalar_reduction*> exprs;
|
||||
for (std::vector<mapping_type>::const_iterator it = mappings.begin(); it != mappings.end(); ++it)
|
||||
for (mapping_type::const_iterator iit = it->begin(); iit != it->end(); ++iit)
|
||||
for (const auto & mapping : mappings)
|
||||
for (mapping_type::const_iterator iit = mapping.begin(); iit != mapping.end(); ++iit)
|
||||
if (mapped_scalar_reduction * p = dynamic_cast<mapped_scalar_reduction*>(iit->second.get()))
|
||||
exprs.push_back(p);
|
||||
std::size_t N = exprs.size();
|
||||
@@ -122,8 +122,8 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
|
||||
{
|
||||
std::string i = (simd_width==1)?"i*#stride":"i";
|
||||
//Fetch vector entry
|
||||
for (std::vector<mapped_scalar_reduction*>::const_iterator it = exprs.begin(); it != exprs.end(); ++it)
|
||||
(*it)->process_recursive(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", append_width("#scalartype",simd_width) + " #namereg = " + vload(simd_width,i,"#pointer")+";")
|
||||
for (const auto & elem : exprs)
|
||||
(elem)->process_recursive(stream, PARENT_NODE_TYPE, tools::make_map<std::map<std::string, std::string> >("array1", append_width("#scalartype",simd_width) + " #namereg = " + vload(simd_width,i,"#pointer")+";")
|
||||
("matrix_row", "#scalartype #namereg = #pointer[$OFFSET{#row*#stride, i*#stride2}];")
|
||||
("matrix_column", "#scalartype #namereg = #pointer[$OFFSET{i*#stride,#column*#stride2}];")
|
||||
("matrix_diag", "#scalartype #namereg = #pointer[#diag_offset<0?$OFFSET{(i - #diag_offset)*#stride, i*#stride2}:$OFFSET{i*#stride, (i + #diag_offset)*#stride2}];"));
|
||||
@@ -137,7 +137,7 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
|
||||
for (unsigned int a = 0; a < simd_width; ++a)
|
||||
str[a] = append_simd_suffix("#namereg.s", a);
|
||||
|
||||
for (unsigned int k = 0; k < exprs.size(); ++k)
|
||||
for (auto & elem : exprs)
|
||||
{
|
||||
for (unsigned int a = 0; a < simd_width; ++a)
|
||||
{
|
||||
@@ -147,12 +147,12 @@ std::string reduction::generate_impl(unsigned int label, const char * type, expr
|
||||
accessors["matrix_column"] = str[a];
|
||||
accessors["matrix_diag"] = str[a];
|
||||
accessors["array0"] = "#namereg";
|
||||
std::string value = exprs[k]->evaluate_recursive(LHS_NODE_TYPE, accessors);
|
||||
if (exprs[k]->is_index_reduction())
|
||||
compute_index_reduction(stream, exprs[k]->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
|
||||
+ tools::to_string(a), exprs[k]->process("#name_acc_value"), value,exprs[k]->root_op());
|
||||
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors);
|
||||
if (elem->is_index_reduction())
|
||||
compute_index_reduction(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
|
||||
+ tools::to_string(a), elem->process("#name_acc_value"), value,elem->root_op());
|
||||
else
|
||||
compute_reduction(stream, exprs[k]->process("#name_acc"), value,exprs[k]->root_op());
|
||||
compute_reduction(stream, elem->process("#name_acc"), value,elem->root_op());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -286,11 +286,11 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
|
||||
//Preprocessing
|
||||
int_t size = input_sizes(expressions)[0];
|
||||
std::vector<array_expression::node const *> reductions;
|
||||
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
|
||||
for (const auto & elem : expressions.data())
|
||||
{
|
||||
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, **it, false);
|
||||
for (std::vector<size_t>::iterator itt = reductions_idx.begin(); itt != reductions_idx.end(); ++itt)
|
||||
reductions.push_back(&(*it)->tree()[*itt]);
|
||||
std::vector<size_t> reductions_idx = filter_nodes(&is_reduction, *elem, false);
|
||||
for (auto & reductions_idx_itt : reductions_idx)
|
||||
reductions.push_back(&(elem)->tree()[reductions_idx_itt]);
|
||||
}
|
||||
|
||||
//Kernel
|
||||
@@ -314,10 +314,10 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
|
||||
cl::Context context = expressions.context();
|
||||
array_expression const & s = *(expressions.data().front());
|
||||
unsigned int dtype_size = size_of(lhs_most(s.tree(), s.root()).lhs.dtype);
|
||||
for (unsigned int k = 0; k < 2; k++)
|
||||
for (auto & kernel : kernels)
|
||||
{
|
||||
unsigned int n_arg = 0;
|
||||
kernels[k].setArg(n_arg++, cl_uint(size));
|
||||
kernel.setArg(n_arg++, cl_uint(size));
|
||||
|
||||
//Temporary buffers
|
||||
unsigned int i = 0;
|
||||
@@ -328,15 +328,15 @@ void reduction::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compi
|
||||
{
|
||||
if (tmpidx_.size() <= j)
|
||||
tmpidx_.push_back(cl::Buffer(context, CL_MEM_READ_WRITE, p_.num_groups*4));
|
||||
kernels[k].setArg(n_arg++, tmpidx_[j]);
|
||||
kernel.setArg(n_arg++, tmpidx_[j]);
|
||||
j++;
|
||||
}
|
||||
if (tmp_.size() <= i)
|
||||
tmp_.push_back(cl::Buffer(context, CL_MEM_READ_WRITE, p_.num_groups*dtype_size));
|
||||
kernels[k].setArg(n_arg++, tmp_[i]);
|
||||
kernel.setArg(n_arg++, tmp_[i]);
|
||||
i++;
|
||||
}
|
||||
set_arguments(expressions, kernels[k], n_arg);
|
||||
set_arguments(expressions, kernel, n_arg);
|
||||
}
|
||||
|
||||
for (unsigned int k = 0; k < 2; k++)
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#include "atidlas/backend/templates/vaxpy.h"
|
||||
#include "atidlas/cl/queues.h"
|
||||
#include "atidlas/cl_ext/backend.h"
|
||||
#include "atidlas/tools/make_map.hpp"
|
||||
#include "atidlas/tools/make_vector.hpp"
|
||||
#include "atidlas/tools/to_string.hpp"
|
||||
@@ -121,8 +121,8 @@ void vaxpy::enqueue(cl::CommandQueue & queue, std::vector<cl_ext::lazy_compiler>
|
||||
bool fallback = p_.simd_width > 1 && (requires_fallback(expressions) || (size%p_.simd_width>0));
|
||||
|
||||
cl::Program const & program = programs[fallback?0:1].program();
|
||||
cl_ext::kernels_t::key_type key(program(), label);
|
||||
cl_ext::kernels_t::iterator it = cl_ext::kernels.find(key);
|
||||
cl_ext::kernels_type::key_type key(program(), label);
|
||||
cl_ext::kernels_type::iterator it = cl_ext::kernels.find(key);
|
||||
if(it==cl_ext::kernels.end())
|
||||
it = cl_ext::kernels.insert(std::make_pair(key, cl::Kernel(program, fallback?kfb:kopt))).first;
|
||||
cl::Kernel & kernel = it->second;
|
||||
|
@@ -1,63 +0,0 @@
|
||||
#include "atidlas/cl/queues.h"
|
||||
#include <assert.h>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
cl_command_queue_properties queue_properties = 0;
|
||||
unsigned int default_context_idx = 0;
|
||||
queues_t queues;
|
||||
kernels_t kernels;
|
||||
|
||||
void synchronize(cl::Context const & context)
|
||||
{
|
||||
std::vector<cl::CommandQueue> & q = get_queues(context);
|
||||
for(std::vector<cl::CommandQueue>::iterator it = q.begin() ; it != q.end() ; ++it)
|
||||
it->finish();
|
||||
}
|
||||
|
||||
void init_queues()
|
||||
{
|
||||
std::vector<cl::Platform> platforms;
|
||||
cl::Platform::get(&platforms);
|
||||
for(std::vector<cl::Platform>::iterator it = platforms.begin() ; it != platforms.end() ; ++it)
|
||||
{
|
||||
std::vector<cl::Device> devices;
|
||||
it->getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||||
for(std::vector<cl::Device>::iterator itt = devices.begin() ; itt != devices.end() ; ++itt)
|
||||
queues.push_back(std::make_pair(cl::Context(std::vector<cl::Device>(1, *itt)), std::vector<cl::CommandQueue>()));
|
||||
}
|
||||
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
|
||||
it->second.push_back(cl::CommandQueue(it->first, it->first.getInfo<CL_CONTEXT_DEVICES>()[0], queue_properties));
|
||||
}
|
||||
|
||||
cl::Context default_context()
|
||||
{
|
||||
if(queues.empty())
|
||||
init_queues();
|
||||
return queues.begin()->first;
|
||||
}
|
||||
|
||||
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx)
|
||||
{
|
||||
if(queues.empty())
|
||||
init_queues();
|
||||
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
|
||||
if(it->first()==ctx()) return it->second;
|
||||
throw std::out_of_range("No such context registered in the backend. Please run atidlas::cl_ext:;register(context, queues)");
|
||||
}
|
||||
|
||||
cl::CommandQueue & get_queue(cl::Context const & ctx, std::size_t idx)
|
||||
{
|
||||
return get_queues(ctx)[idx];
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
78
lib/cl_ext/backend.cpp
Normal file
78
lib/cl_ext/backend.cpp
Normal file
@@ -0,0 +1,78 @@
|
||||
#include "atidlas/cl_ext/backend.h"
|
||||
#include <assert.h>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace atidlas
|
||||
{
|
||||
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
void synchronize(cl::Context const & context)
|
||||
{
|
||||
for(std::vector<cl::CommandQueue>::const_iterator it = queues[context].begin() ; it != queues[context].end() ; ++it)
|
||||
it->finish();
|
||||
}
|
||||
|
||||
void queues_type::append(const cl::Context & context)
|
||||
{
|
||||
data_.push_back(std::make_pair(context, std::vector<cl::CommandQueue>()));
|
||||
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
|
||||
for(auto & device : devices)
|
||||
data_.back().second.push_back(cl::CommandQueue(context, device, queue_properties));
|
||||
}
|
||||
|
||||
void queues_type::init()
|
||||
{
|
||||
std::vector<cl::Platform> platforms;
|
||||
cl::Platform::get(&platforms);
|
||||
for(auto & platform : platforms)
|
||||
{
|
||||
std::vector<cl::Device> devices;
|
||||
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||||
for(auto & device : devices)
|
||||
data_.push_back(std::make_pair(cl::Context(std::vector<cl::Device>(1, device)), std::vector<cl::CommandQueue>()));
|
||||
}
|
||||
for(auto & elem : data_)
|
||||
elem.second.push_back(cl::CommandQueue(elem.first, elem.first.getInfo<CL_CONTEXT_DEVICES>()[0], queue_properties));
|
||||
}
|
||||
|
||||
std::vector<cl::CommandQueue> & queues_type::operator [](cl::Context const & ctx)
|
||||
{
|
||||
if(data_.empty())
|
||||
init();
|
||||
for(auto & elem : data_)
|
||||
if(elem.first()==ctx()) return elem.second;
|
||||
append(ctx);
|
||||
return data_.back().second;
|
||||
}
|
||||
|
||||
cl::Context queues_type::default_context()
|
||||
{
|
||||
if(data_.empty())
|
||||
init();
|
||||
data_type::iterator it = data_.begin();
|
||||
std::advance(it, default_context_idx);
|
||||
return it->first;
|
||||
}
|
||||
|
||||
cl::Context default_context()
|
||||
{
|
||||
return queues.default_context();
|
||||
}
|
||||
|
||||
queues_type::data_type const & queues_type::data()
|
||||
{
|
||||
if(data_.empty())
|
||||
init();
|
||||
return data_;
|
||||
}
|
||||
|
||||
cl_command_queue_properties queue_properties = 0;
|
||||
unsigned int default_context_idx = 0;
|
||||
queues_type queues;
|
||||
kernels_type kernels;
|
||||
|
||||
}
|
||||
|
||||
}
|
@@ -1,4 +1,4 @@
|
||||
#include "atidlas/cl/lazy_compiler.h"
|
||||
#include "atidlas/cl_ext/lazy_compiler.h"
|
||||
|
||||
namespace atidlas
|
||||
{
|
@@ -3,7 +3,7 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include "atidlas/cl/program_map.h"
|
||||
#include "atidlas/cl_ext/program_map.h"
|
||||
#include "sha1.hpp"
|
||||
|
||||
namespace atidlas
|
@@ -213,8 +213,8 @@ inline std::string sha1(std::string const & src)
|
||||
sha1.getDigest(hash);
|
||||
|
||||
std::ostringstream oss;
|
||||
for (int i = 0; i < 5; ++i)
|
||||
oss << std::hex << std::setfill('0') << std::setw(8) << hash[i];
|
||||
for (auto & elem : hash)
|
||||
oss << std::hex << std::setfill('0') << std::setw(8) << elem;
|
||||
|
||||
return oss.str();
|
||||
}
|
@@ -1,6 +1,7 @@
|
||||
#include <set>
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
|
||||
#include "rapidjson/document.h"
|
||||
#include "atidlas/backend/parse.h"
|
||||
@@ -39,8 +40,8 @@ void model::fill_program_name(char* program_name, expressions_tuple const & expr
|
||||
binder = new bind_to_handle();
|
||||
else
|
||||
binder = new bind_all_unique();
|
||||
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
|
||||
traverse(**it, (*it)->root(), array_expression_representation_functor(*binder, program_name),true);
|
||||
for (const auto & elem : expressions.data())
|
||||
traverse(*elem, elem->root(), array_expression_representation_functor(*binder, program_name),true);
|
||||
*program_name='\0';
|
||||
delete binder;
|
||||
}
|
||||
@@ -80,11 +81,11 @@ std::vector<cl_ext::lazy_compiler>& model::init(expressions_tuple const & expres
|
||||
return to_init;
|
||||
}
|
||||
|
||||
model::model(predictors::random_forest const & predictor, std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
|
||||
model::model(predictors::random_forest const & predictor, std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) :
|
||||
templates_(templates), predictor_(new predictors::random_forest(predictor)), queue_(queue)
|
||||
{}
|
||||
|
||||
model::model(std::vector< tools::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
|
||||
model::model(std::vector< std::shared_ptr<base> > const & templates, cl::CommandQueue & queue) : templates_(templates), queue_(queue)
|
||||
{}
|
||||
|
||||
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
|
||||
@@ -166,27 +167,27 @@ namespace detail
|
||||
throw std::invalid_argument("Invalid datatype: " + name);
|
||||
}
|
||||
|
||||
static tools::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
|
||||
static std::shared_ptr<base> create(std::string const & template_name, std::vector<int> const & a)
|
||||
{
|
||||
fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS};
|
||||
if(template_name=="vaxpy")
|
||||
return tools::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
|
||||
return std::shared_ptr<base>(new vaxpy(a[0], a[1], a[2], fetch[a[3]]));
|
||||
else if(template_name=="dot")
|
||||
return tools::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
|
||||
return std::shared_ptr<base>(new reduction(a[0], a[1], a[2], fetch[a[3]]));
|
||||
else if(template_name=="maxpy")
|
||||
return tools::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
|
||||
return std::shared_ptr<base>(new maxpy(a[0], a[1], a[2], a[3], a[4], fetch[a[5]]));
|
||||
else if(template_name.find("gemvN")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||
return std::shared_ptr<base>(new mreduction_rows(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||
else if(template_name.find("gemvT")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||
return std::shared_ptr<base>(new mreduction_cols(a[0], a[1], a[2], a[3], fetch[a[4]]));
|
||||
else if(template_name.find("gemmNN")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
return std::shared_ptr<base>(new mproduct_nn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else if(template_name.find("gemmTN")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
return std::shared_ptr<base>(new mproduct_tn(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else if(template_name.find("gemmNT")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
return std::shared_ptr<base>(new mproduct_nt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else if(template_name.find("gemmTT")!=std::string::npos)
|
||||
return tools::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
return std::shared_ptr<base>(new mproduct_tt(a[0], a[1], a[2], a[3], a[4], a[5], a[6], fetch[a[7]], fetch[a[8]], a[9], a[10]));
|
||||
else
|
||||
throw std::invalid_argument("Invalid expression: " + template_name);
|
||||
}
|
||||
@@ -208,32 +209,32 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
|
||||
//Deserialize
|
||||
std::vector<std::string> operations = tools::make_vector<std::string>() << "vaxpy" << "dot" << "maxpy" << "gemvN" << "gemvT" << "gemmNN" << "gemmTN" << "gemmTT";
|
||||
std::vector<std::string> dtype = tools::make_vector<std::string>() << "float32" << "float64";
|
||||
for(std::vector<std::string>::iterator op = operations.begin() ; op != operations.end() ; ++op)
|
||||
for(auto & operation : operations)
|
||||
{
|
||||
const char * opcstr = op->c_str();
|
||||
const char * opcstr = operation.c_str();
|
||||
if(document.HasMember(opcstr))
|
||||
{
|
||||
expression_type etype = detail::get_expression_type(*op);
|
||||
for(std::vector<std::string>::iterator dt = dtype.begin() ; dt != dtype.end() ; ++dt)
|
||||
expression_type etype = detail::get_expression_type(operation);
|
||||
for(auto & elem : dtype)
|
||||
{
|
||||
const char * dtcstr = dt->c_str();
|
||||
const char * dtcstr = elem.c_str();
|
||||
if(document[opcstr].HasMember(dtcstr))
|
||||
{
|
||||
numeric_type dtype = detail::get_dtype(*dt);
|
||||
numeric_type dtype = detail::get_dtype(elem);
|
||||
|
||||
// Get profiles
|
||||
std::vector<tools::shared_ptr<base> > templates;
|
||||
std::vector<std::shared_ptr<base> > templates;
|
||||
js::Value const & profiles = document[opcstr][dtcstr]["profiles"];
|
||||
for (js::SizeType id = 0 ; id < profiles.Size() ; ++id)
|
||||
templates.push_back(detail::create(*op, tools::to_int_array<int>(profiles[id])));
|
||||
templates.push_back(detail::create(operation, tools::to_int_array<int>(profiles[id])));
|
||||
if(templates.size()>1)
|
||||
{
|
||||
// Get predictor
|
||||
predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]);
|
||||
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(predictor, templates, queue));
|
||||
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(predictor, templates, queue));
|
||||
}
|
||||
else
|
||||
result[std::make_pair(etype, dtype)] = tools::shared_ptr<model>(new model(templates, queue));
|
||||
result[std::make_pair(etype, dtype)] = std::shared_ptr<model>(new model(templates, queue));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -243,11 +244,11 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
|
||||
model_map_t init_models(cl::CommandQueue & queue)
|
||||
{
|
||||
model_map_t res;
|
||||
typedef tools::shared_ptr<model> ptr_t;
|
||||
typedef std::shared_ptr<model> ptr_t;
|
||||
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
||||
|
||||
for(size_t i = 0 ; i < 10 ; ++i){
|
||||
numeric_type DTYPE = types[i];
|
||||
for(auto DTYPE : types){
|
||||
|
||||
res[std::make_pair(SCALAR_AXPY_TYPE, DTYPE)] = ptr_t(new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
|
||||
res[std::make_pair(VECTOR_AXPY_TYPE, DTYPE)] = ptr_t (new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
|
||||
res[std::make_pair(REDUCTION_TYPE, DTYPE)] = ptr_t(new model(reduction(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue));
|
||||
|
@@ -38,9 +38,9 @@ random_forest::random_forest(rapidjson::Value const & estimators)
|
||||
std::vector<float> random_forest::predict(std::vector<int_t> const & x) const
|
||||
{
|
||||
std::vector<float> res(D_, 0);
|
||||
for(std::vector<tree>::const_iterator it = estimators_.begin() ; it != estimators_.end() ; ++it)
|
||||
for(const auto & elem : estimators_)
|
||||
{
|
||||
std::vector<float> const & subres = it->predict(x);
|
||||
std::vector<float> const & subres = elem.predict(x);
|
||||
for(int_t i = 0 ; i < D_ ; ++i)
|
||||
res[i] += subres[i];
|
||||
}
|
||||
|
@@ -172,30 +172,30 @@ namespace atidlas
|
||||
|
||||
/*----Parse required temporaries-----*/
|
||||
detail::parse(tree, rootidx, current_type, breakpoints, final_type);
|
||||
std::vector<tools::shared_ptr<obj_base> > temporaries_;
|
||||
std::vector<std::shared_ptr<array> > temporaries_;
|
||||
|
||||
/*----Compute required temporaries----*/
|
||||
for(detail::breakpoints_t::reverse_iterator rit = breakpoints.rbegin() ; rit != breakpoints.rend() ; ++rit)
|
||||
{
|
||||
tools::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
|
||||
std::shared_ptr<model> const & pmodel = models[std::make_pair(rit->first, dtype)];
|
||||
array_expression::node const & node = tree[rit->second->node_index];
|
||||
array_expression::node const & lmost = lhs_most(tree, node);
|
||||
|
||||
//Creates temporary
|
||||
tools::shared_ptr<obj_base> tmp;
|
||||
std::shared_ptr<array> tmp;
|
||||
switch(rit->first){
|
||||
case SCALAR_AXPY_TYPE:
|
||||
case REDUCTION_TYPE: tmp = tools::shared_ptr<obj_base>(new array(1, dtype, context)); break;
|
||||
case REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(1, dtype, context)); break;
|
||||
|
||||
case VECTOR_AXPY_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
||||
case ROW_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
||||
case COL_WISE_REDUCTION_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape2, dtype, context)); break;
|
||||
case VECTOR_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
||||
case ROW_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, dtype, context)); break;
|
||||
case COL_WISE_REDUCTION_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape2, dtype, context)); break;
|
||||
|
||||
case MATRIX_AXPY_TYPE: tmp = tools::shared_ptr<obj_base>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_NN_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_NT_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_TN_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_TT_TYPE: tmp = tools::shared_ptr<obj_base>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
|
||||
case MATRIX_AXPY_TYPE: tmp = std::shared_ptr<array>(new array(lmost.lhs.array.shape1, lmost.lhs.array.shape2, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_NN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape2, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_NT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape1, node.rhs.array.shape1, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_TN_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape2, dtype, context)); break;
|
||||
case MATRIX_PRODUCT_TT_TYPE: tmp = std::shared_ptr<array>(new array(node.lhs.array.shape2, node.rhs.array.shape1, dtype, context)); break;
|
||||
|
||||
default: throw std::invalid_argument("Unrecognized operation");
|
||||
}
|
||||
|
@@ -176,9 +176,9 @@ array_expression array_expression::operator!()
|
||||
|
||||
|
||||
//
|
||||
tools::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
|
||||
std::shared_ptr<array_expression> expressions_tuple::create(array_expression const & s)
|
||||
{
|
||||
return tools::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
|
||||
return std::shared_ptr<array_expression>(new array_expression(static_cast<array_expression const &>(s)));
|
||||
}
|
||||
|
||||
expressions_tuple::expressions_tuple(data_type const & data, order_type order) : data_(data), order_(order)
|
||||
|
@@ -45,7 +45,7 @@ def main():
|
||||
return optlist
|
||||
|
||||
cvars = sysconfig.get_config_vars()
|
||||
cvars['OPT'] = "-DNDEBUG -O3 " + str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-O', '-Wstrict-prototypes', '-DNDEBUG']))
|
||||
cvars['OPT'] = "-DNDEBUG -O3 -std=c++11 " + str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-O', '-Wstrict-prototypes', '-DNDEBUG']))
|
||||
cvars["CFLAGS"] = cvars["BASECFLAGS"] + " " + cvars["OPT"]
|
||||
cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"]
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#include <list>
|
||||
#include <functional>
|
||||
|
||||
#include <boost/python.hpp>
|
||||
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
|
||||
@@ -190,9 +191,6 @@ namespace detail
|
||||
return to_list(devices.begin(), devices.end());
|
||||
}
|
||||
|
||||
std::vector<cl::CommandQueue> & get_queue(cl::Context const & ctx)
|
||||
{ return atd::cl_ext::get_queues(ctx); }
|
||||
|
||||
atd::numeric_type extract_dtype(bp::object const & odtype)
|
||||
{
|
||||
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
||||
@@ -352,7 +350,7 @@ void export_cl()
|
||||
.def("__init__", bp::make_constructor(&detail::make_context))
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_context_info<NAME>)
|
||||
#undef WRAP
|
||||
.add_property("queues", bp::make_function(&detail::get_queue, bp::return_internal_reference<>()))
|
||||
.add_property("queues", bp::make_function(static_cast<std::vector<cl::CommandQueue> & (*)(const cl::Context&)>( [](const cl::Context & ctx) -> std::vector<cl::CommandQueue> & { return atd::cl_ext::queues[ctx]; }) , bp::return_internal_reference<>()))
|
||||
;
|
||||
|
||||
bp::class_<cl::CommandQueue>("command_queue", bp::init<cl::Context, cl::Device>())
|
||||
|
@@ -3,5 +3,5 @@ foreach(PROG maxpy vaxpy reduction mreduction mproduct)
|
||||
add_executable(${PROG}-test ${PROG}.cpp)
|
||||
add_test(${PROG} ${PROG}-test)
|
||||
target_link_libraries(${PROG}-test atidlas ${OPENCL_LIBRARIES})
|
||||
set_target_properties(${PROG}-test PROPERTIES COMPILE_FLAGS "-Wall -Wextra")
|
||||
set_target_properties(${PROG}-test PROPERTIES COMPILE_FLAGS "-Wall -Wextra -std=c++11")
|
||||
endforeach(PROG)
|
||||
|
@@ -125,15 +125,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
for(const auto & elem : ad::cl_ext::queues.data())
|
||||
{
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
test_impl<float>(1e-4, it->first);
|
||||
test_impl<float>(1e-4, elem.first);
|
||||
std::cout << ">> double" << std::endl;
|
||||
test_impl<double>(1e-9, it->first);
|
||||
test_impl<double>(1e-9, elem.first);
|
||||
std::cout << "---" << std::endl;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
#include "viennacl/vector.hpp"
|
||||
|
||||
#include "atidlas/tools/shared_ptr.hpp"
|
||||
#include <memory>
|
||||
#include "atidlas/model/import.hpp"
|
||||
|
||||
namespace ad = atidlas;
|
||||
@@ -10,7 +10,7 @@ namespace ad = atidlas;
|
||||
int main()
|
||||
{
|
||||
viennacl::vector<float> x(10000), y(10000), z(10000);
|
||||
std::map<std::string, ad::tools::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
|
||||
std::map<std::string, ad::std::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json");
|
||||
models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
|
||||
models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
|
||||
return EXIT_SUCCESS;
|
||||
|
@@ -75,15 +75,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
for(const auto & elem : ad::cl_ext::queues.data())
|
||||
{
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
test_impl<float>(1e-4, it->first);
|
||||
test_impl<float>(1e-4, elem.first);
|
||||
std::cout << ">> double" << std::endl;
|
||||
test_impl<double>(1e-9, it->first);
|
||||
test_impl<double>(1e-9, elem.first);
|
||||
std::cout << "---" << std::endl;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
|
@@ -65,15 +65,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
for(const auto & elem : ad::cl_ext::queues.data())
|
||||
{
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
test_impl<float>(1e-4, it->first);
|
||||
test_impl<float>(1e-4, elem.first);
|
||||
std::cout << ">> double" << std::endl;
|
||||
test_impl<double>(1e-9, it->first);
|
||||
test_impl<double>(1e-9, elem.first);
|
||||
std::cout << "---" << std::endl;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
|
@@ -74,15 +74,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
for(const auto & elem : ad::cl_ext::queues.data())
|
||||
{
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
test_impl<float>(1e-4, it->first);
|
||||
test_impl<float>(1e-4, elem.first);
|
||||
std::cout << ">> double" << std::endl;
|
||||
test_impl<double>(1e-9, it->first);
|
||||
test_impl<double>(1e-9, elem.first);
|
||||
std::cout << "---" << std::endl;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
|
@@ -120,15 +120,15 @@ void test_impl(T epsilon, cl::Context const & ctx)
|
||||
|
||||
int main()
|
||||
{
|
||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||
for(const auto & elem : ad::cl_ext::queues.data())
|
||||
{
|
||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Device device = elem.second[0].getInfo<CL_QUEUE_DEVICE>();
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "---" << std::endl;
|
||||
std::cout << ">> float" << std::endl;
|
||||
test_impl<float>(1e-4, it->first);
|
||||
test_impl<float>(1e-4, elem.first);
|
||||
std::cout << ">> double" << std::endl;
|
||||
test_impl<double>(1e-9, it->first);
|
||||
test_impl<double>(1e-9, elem.first);
|
||||
std::cout << "---" << std::endl;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
|
Reference in New Issue
Block a user