Bugfix in autotuner
This commit is contained in:
@@ -19,14 +19,14 @@ class array: public obj_base
|
||||
public:
|
||||
//1D Constructors
|
||||
array(int_t size1, numeric_type dtype, cl::Context context = cl::default_context());
|
||||
template<typename T>
|
||||
array(std::vector<T> const & data, cl::Context context = cl::default_context());
|
||||
template<typename DT>
|
||||
array(std::vector<DT> const & data, cl::Context context = cl::default_context());
|
||||
array(array & v, slice const & s1);
|
||||
|
||||
//2D Constructors
|
||||
array(int_t size1, int_t size2, numeric_type dtype, cl::Context context = cl::default_context());
|
||||
template<typename T>
|
||||
array(int_t size1, int_t size2, std::vector<T> const & data, cl::Context context = cl::default_context());
|
||||
template<typename DT>
|
||||
array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context = cl::default_context());
|
||||
array(array & M, slice const & s1, slice const & s2);
|
||||
|
||||
//General constructor
|
||||
@@ -72,6 +72,8 @@ public:
|
||||
scalar operator[](int_t);
|
||||
array operator[](slice const &);
|
||||
array operator()(slice const &, slice const &);
|
||||
|
||||
array_expression T() const;
|
||||
protected:
|
||||
numeric_type dtype_;
|
||||
|
||||
@@ -113,9 +115,7 @@ public:
|
||||
};
|
||||
|
||||
|
||||
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl::default_context());
|
||||
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl::default_context());
|
||||
array reshape(array const &, int_t, int_t);
|
||||
|
||||
|
||||
//copy
|
||||
|
||||
@@ -209,6 +209,10 @@ ATIDLAS_DECLARE_REDUCTION(max)
|
||||
ATIDLAS_DECLARE_REDUCTION(min)
|
||||
ATIDLAS_DECLARE_REDUCTION(argmin)
|
||||
|
||||
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl::default_context());
|
||||
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl::default_context());
|
||||
array reshape(array const &, int_t, int_t);
|
||||
|
||||
//
|
||||
std::ostream& operator<<(std::ostream &, array const &);
|
||||
std::ostream& operator<<(std::ostream & os, scalar const & s);
|
||||
|
@@ -104,6 +104,7 @@ enum operation_node_type
|
||||
OPERATOR_MATRIX_ROW_TYPE,
|
||||
OPERATOR_MATRIX_COLUMN_TYPE,
|
||||
OPERATOR_REPEAT_TYPE,
|
||||
OPERATOR_SHIFT_TYPE,
|
||||
OPERATOR_VDIAG_TYPE,
|
||||
|
||||
OPERATOR_MATRIX_PRODUCT_NN_TYPE,
|
||||
|
@@ -19,9 +19,9 @@ array::array(int_t size1, numeric_type dtype, cl::Context context) :
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*dsize())
|
||||
{ }
|
||||
|
||||
template<class T>
|
||||
array::array(std::vector<T> const & x, cl::Context context):
|
||||
dtype_(to_numeric_type<T>::value), shape_(x.size(), 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
|
||||
template<class DT>
|
||||
array::array(std::vector<DT> const & x, cl::Context context):
|
||||
dtype_(to_numeric_type<DT>::value), shape_(x.size(), 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
|
||||
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
|
||||
{ *this = x; }
|
||||
|
||||
@@ -53,9 +53,9 @@ array::array(array & M, slice const & s1, slice const & s2) : dtype_(M.dtype_),
|
||||
context_(M.data_.getInfo<CL_MEM_CONTEXT>()), data_(M.data_)
|
||||
{ }
|
||||
|
||||
template<typename T>
|
||||
array::array(int_t size1, int_t size2, std::vector<T> const & data, cl::Context context)
|
||||
: dtype_(to_numeric_type<T>::value),
|
||||
template<typename DT>
|
||||
array::array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context)
|
||||
: dtype_(to_numeric_type<DT>::value),
|
||||
shape_(size1, size2), start_(0, 0), stride_(1, 1), ld_(size1),
|
||||
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
|
||||
{
|
||||
@@ -146,8 +146,8 @@ array & array::operator=(array_expression const & rhs)
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
array & array::operator=(std::vector<T> const & rhs)
|
||||
template<class DT>
|
||||
array & array::operator=(std::vector<DT> const & rhs)
|
||||
{
|
||||
assert(nshape()==1);
|
||||
atidlas::copy(rhs, *this);
|
||||
@@ -208,6 +208,9 @@ array & array::operator/=(array const & rhs)
|
||||
array & array::operator/=(array_expression const & rhs)
|
||||
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), shape_); }
|
||||
|
||||
array_expression array::T() const
|
||||
{ return atidlas::trans(*this) ;}
|
||||
|
||||
/*--- Indexing operators -----*/
|
||||
//---------------------------------------
|
||||
scalar array::operator [](int_t idx)
|
||||
@@ -481,17 +484,17 @@ atidlas::array_expression zeros(std::size_t M, std::size_t N, atidlas::numeric_t
|
||||
return array_expression(value_scalar(0), lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), ctx, dtype, size4(M, N));
|
||||
}
|
||||
|
||||
inline size4 trans(size4 const & shape)
|
||||
inline size4 flip(size4 const & shape)
|
||||
{ return size4(shape._2, shape._1);}
|
||||
|
||||
inline size4 prod(size4 const & shape1, size4 const & shape2)
|
||||
{ return size4(shape1._1*shape2._1, shape1._2*shape2._2);}
|
||||
|
||||
array_expression trans(array const & x) \
|
||||
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), trans(x.shape())); }\
|
||||
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\
|
||||
\
|
||||
array_expression trans(array_expression const & x) \
|
||||
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), trans(x.shape())); }
|
||||
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), flip(x.shape())); }
|
||||
|
||||
array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2)
|
||||
{
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import division
|
||||
import argparse, itertools, os, sys, json
|
||||
import misc_tools, optimize, dataset
|
||||
import pyatidlas as atd
|
||||
import pyopencl as cl
|
||||
import numpy as np
|
||||
|
||||
from numpy import random
|
||||
@@ -34,7 +33,8 @@ TYPES = { 'vaxpy': {'template':atd.vaxpy,
|
||||
|
||||
def do_tuning(args):
|
||||
device = args.device
|
||||
|
||||
context = atd.context(device)
|
||||
context.queues.append(atd.command_queue(context, device))
|
||||
if os.path.isfile(args.json_file):
|
||||
json_out = json.load(open(args.json_file, 'r'))
|
||||
else:
|
||||
@@ -98,7 +98,7 @@ def do_tuning(args):
|
||||
D = json_out[full_operation][dtypestr]
|
||||
|
||||
if args.method == 'simple':
|
||||
print default_tuning_sizes[operation]
|
||||
print 'Size : ', ','.join(map(str, default_tuning_sizes[operation]))
|
||||
profiles = [execution_handler(map(int,default_tuning_sizes[operation]))]
|
||||
else:
|
||||
def compute_perf(x, t):
|
||||
@@ -125,48 +125,48 @@ def do_tuning(args):
|
||||
#Vector AXPY
|
||||
if operation=='vaxpy':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
x = atd.empty(sizes[0], datatype)
|
||||
y = atd.empty(sizes[0], datatype)
|
||||
x = atd.empty(sizes[0], datatype, context=context)
|
||||
y = atd.empty(sizes[0], datatype, context=context)
|
||||
return execute(x + y, sizes, Template, parameters, fname)
|
||||
tune(execution_handler, 1e3, 2e7, 1, (),'log', 'log')
|
||||
#dot
|
||||
if operation=='dot':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
x = atd.empty(sizes[0], datatype)
|
||||
y = atd.empty(sizes[0], datatype)
|
||||
x = atd.empty(sizes[0], datatype, context=context)
|
||||
y = atd.empty(sizes[0], datatype, context=context)
|
||||
s = atd.scalar(datatype)
|
||||
return execute(atd.dot(x, y), sizes, Template, parameters, fname)
|
||||
tune(execution_handler, 1e3, 2e7, 1, (),'log', 'log')
|
||||
#Matrix AXPY
|
||||
if operation=='maxpy':
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
A = atd.empty(sizes, datatype)
|
||||
C = atd.empty(sizes, datatype)
|
||||
A = atd.empty(sizes, datatype, context=context)
|
||||
C = atd.empty(sizes, datatype, context=context)
|
||||
return execute(A + C, sizes, Template, parameters, fname)
|
||||
tune(execution_handler, 100, 5000, 2, (),'log', 'log')
|
||||
#Row-wise dot
|
||||
if operation=='gemv':
|
||||
for A_trans in args.gemv_layouts:
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
Template = Template[A_trans]
|
||||
A = atd.empty(sizes if A_trans=='N' else sizes[::-1], datatype)
|
||||
x = atd.empty(sizes[1], datatype)
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
A = atd.empty(sizes if A_trans=='N' else sizes[::-1], datatype, context=context)
|
||||
x = atd.empty(sizes[1], datatype, context=context)
|
||||
LHS = A if A_trans=='N' else A.T
|
||||
return execute(device, atd.dot(LHS, x), sizes, Template, parameters, fname)
|
||||
return execute(atd.dot(LHS, x), sizes, Template, parameters, fname)
|
||||
tune(execution_handler, 100, 5000, 2, (A_trans,),'log', 'log')
|
||||
#Matrix Product
|
||||
if operation=='gemm':
|
||||
for L in args.gemm_layouts:
|
||||
A_trans = L[0]
|
||||
B_trans = L[1]
|
||||
Template = Template[(A_trans, B_trans)]
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
Template = Template[A_trans, B_trans]
|
||||
A = atd.empty((sizes[0], sizes[2]) if A_trans=='N' else (sizes[2], sizes[0]), datatype)
|
||||
B = atd.empty((sizes[2], sizes[1]) if B_trans=='N' else (sizes[1], sizes[2]), datatype)
|
||||
A = atd.empty((sizes[0], sizes[2]) if A_trans=='N' else (sizes[2], sizes[0]), datatype, context=context)
|
||||
B = atd.empty((sizes[2], sizes[1]) if B_trans=='N' else (sizes[1], sizes[2]), datatype, context=context)
|
||||
LHS = A if A_trans=='N' else A.T
|
||||
RHS = B if B_trans=='N' else B.T
|
||||
return execute(device, atd.dot(LHS, RHS),(A_trans,B_trans), sizes, fname, parameters)
|
||||
tune(execution_handler, 100, 2000, 3,(A_trans,B_trans), 'linear')
|
||||
return execute(atd.dot(LHS, RHS), sizes, Template, parameters, fname)
|
||||
tune(execution_handler, 100, 2000, 3,(A_trans,B_trans), 'linear', 'linear')
|
||||
|
||||
json.dump(json_out, open(args.json_file,'w'))
|
||||
|
||||
@@ -177,25 +177,6 @@ class ArgumentsHandler:
|
||||
|
||||
def __init__(self):
|
||||
|
||||
#No action argument -> interactive tuning
|
||||
if len(sys.argv)==1:
|
||||
def add_input(help, default):
|
||||
return raw_input(help + "[" + default + "] : ") or default
|
||||
|
||||
self.device = add_input('Device to tune for','0')
|
||||
self.operations = add_input('Operations to tune for','vaxpy,maxpy,dot,gemv,gemm-float32')
|
||||
self.gemm_layouts = add_input('GEMV Layouts', 'NN,NT,TN,TT')
|
||||
self.gemv_layouts = add_input('GEMV Layouts', 'N,T')
|
||||
self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json')
|
||||
self.method = add_input('Tuning type', 'simple')
|
||||
if self.method == 'simple':
|
||||
self.blas1_size = add_input('BLAS1 size', '10e6')
|
||||
self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',')
|
||||
self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')
|
||||
else:
|
||||
self.build_model = True
|
||||
self.sample_size = 30
|
||||
else:
|
||||
#Command line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
subparsers = parser.add_subparsers(dest='action')
|
||||
@@ -236,12 +217,13 @@ class ArgumentsHandler:
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
|
||||
platforms = atd.get_platforms()
|
||||
devices = [d for platform in platforms for d in platform.get_devices()]
|
||||
print("----------------")
|
||||
print("Devices available:")
|
||||
print("----------------")
|
||||
for (i, d) in enumerate(devices):
|
||||
print 'Device', i, '|', cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name
|
||||
print 'Device', i, '|', atd.device_type_to_string(d.type), '|', d.name, 'on', d.platform.name
|
||||
print("----------------")
|
||||
|
||||
args = ArgumentsHandler()
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import division
|
||||
|
||||
import pyopencl
|
||||
import time
|
||||
import os
|
||||
import sys
|
||||
@@ -186,10 +185,13 @@ class OccupancyRecord:
|
||||
|
||||
|
||||
def __init__(self, dev, threads, shared_mem=0, registers=0):
|
||||
if 'advanced micro devices' in dev.vendor.lower():
|
||||
vendor = dev.vendor.lower()
|
||||
if any(X in vendor for X in ['advanced micro devices', 'amd']):
|
||||
self.init_amd(dev, threads, shared_mem, registers)
|
||||
elif 'nvidia' in dev.vendor.lower():
|
||||
elif 'nvidia' in vendor:
|
||||
self.init_nvidia(dev, threads, shared_mem, registers)
|
||||
elif 'intel' in vendor:
|
||||
self.occupancy = 100
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
@@ -85,13 +85,13 @@ bp::tuple get_shape(atd::array const & x)
|
||||
return bp::make_tuple(x.shape()._1, x.shape()._2);
|
||||
}
|
||||
|
||||
void set_shape(atd::array & x, bp::tuple const & t)
|
||||
{
|
||||
unsigned int len = bp::len(t);
|
||||
atd::int_t size1 = bp::extract<atd::int_t>(t[0]);
|
||||
atd::int_t size2 = len<2?1:bp::extract<atd::int_t>(t[1]);
|
||||
x.reshape(size1, size2);
|
||||
}
|
||||
//void set_shape(atd::array & x, bp::tuple const & t)
|
||||
//{
|
||||
// unsigned int len = bp::len(t);
|
||||
// atd::int_t size1 = bp::extract<atd::int_t>(t[0]);
|
||||
// atd::int_t size2 = len<2?1:bp::extract<atd::int_t>(t[1]);
|
||||
// x.reshape(size1, size2);
|
||||
//}
|
||||
|
||||
boost::python::dict create_queues(atd::cl::queues_t queues)
|
||||
{
|
||||
@@ -182,6 +182,15 @@ void export_symbolic()
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template<class IT>
|
||||
bp::list to_list(IT const & begin, IT const & end)
|
||||
{
|
||||
bp::list res;
|
||||
for (IT it = begin; it != end; ++it)
|
||||
res.append(*it);
|
||||
return res;
|
||||
}
|
||||
|
||||
bp::list nv_compute_capability(atd::cl::Device const & device)
|
||||
{
|
||||
bp::list res;
|
||||
@@ -190,16 +199,23 @@ namespace detail
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string vendor(atd::cl::Device const & device){
|
||||
return device.getInfo<CL_DEVICE_VENDOR>();
|
||||
bp::list get_platforms()
|
||||
{
|
||||
std::vector<atd::cl::Platform> platforms;
|
||||
atd::cl::Platform::get(&platforms);
|
||||
return to_list(platforms.begin(), platforms.end());
|
||||
}
|
||||
|
||||
bp::list get_devices(atd::cl::Platform const & platform)
|
||||
{
|
||||
std::vector<atd::cl::Device> devices;
|
||||
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||||
return to_list(devices.begin(), devices.end());
|
||||
}
|
||||
|
||||
std::vector<atd::cl::CommandQueue> & get_queue(atd::cl::Context const & ctx)
|
||||
{ return atd::cl::queues[ctx]; }
|
||||
|
||||
atd::cl::Device get_device(atd::cl::CommandQueue & queue)
|
||||
{ return queue.getInfo<CL_QUEUE_DEVICE>(); }
|
||||
|
||||
atd::numeric_type extract_dtype(bp::object const & odtype)
|
||||
{
|
||||
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
||||
@@ -272,20 +288,50 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
atd::cl::Platform get_platform(atd::cl::Device const & device)
|
||||
{ return atd::cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_device_info, INFO>::param_type
|
||||
wrap_device_info(atd::cl::Device const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_context_info, INFO>::param_type
|
||||
wrap_context_info(atd::cl::Context const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_platform_info, INFO>::param_type
|
||||
wrap_platform_info(atd::cl::Platform const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
template<cl_int INFO>
|
||||
typename atd::cl::detail::param_traits<atd::cl::detail::cl_command_queue_info, INFO>::param_type
|
||||
wrap_command_queue_info(atd::cl::CommandQueue const & x)
|
||||
{ return x.getInfo<INFO>(NULL); }
|
||||
|
||||
|
||||
std::string to_string(cl_device_type type)
|
||||
{
|
||||
if(type==CL_DEVICE_TYPE_ALL) return "ALL";
|
||||
if(type==CL_DEVICE_TYPE_CPU) return "CPU";
|
||||
if(type==CL_DEVICE_TYPE_GPU) return "GPU";
|
||||
if(type==CL_DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void export_cl()
|
||||
{
|
||||
typedef std::vector<atd::cl::CommandQueue> queues_t;
|
||||
|
||||
bp::class_<queues_t>("queues")
|
||||
.def("__len__", &queues_t::size)
|
||||
.def("__getitem__", &bp::vector_indexing_suite<queues_t>::get_item, bp::return_internal_reference<>())
|
||||
.def("__setitem__", &bp::vector_indexing_suite<queues_t>::set_item, bp::with_custodian_and_ward<1,2>())
|
||||
;
|
||||
.def("append", &bp::vector_indexing_suite<queues_t>::append)
|
||||
|
||||
bp::class_<atd::cl::Device>("device", bp::no_init)
|
||||
.add_property("nv_compute_capability", &detail::nv_compute_capability)
|
||||
.add_property("vendor", &detail::vendor)
|
||||
;
|
||||
|
||||
bp::class_<atd::model_map_t>("models")
|
||||
@@ -293,18 +339,50 @@ void export_cl()
|
||||
.def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>())
|
||||
;
|
||||
|
||||
bp::class_<atd::cl::Context>("context", bp::no_init)
|
||||
bp::enum_<cl_device_type>("device_type")
|
||||
.value("CL_DEVICE_TYPE_ALL", CL_DEVICE_TYPE_ALL)
|
||||
.value("CL_DEVICE_TYPE_CPU", CL_DEVICE_TYPE_CPU)
|
||||
.value("CL_DEVICE_TYPE_GPU", CL_DEVICE_TYPE_GPU)
|
||||
.value("CL_DEVICE_TYPE_ACCELERATOR", CL_DEVICE_TYPE_ACCELERATOR)
|
||||
;
|
||||
|
||||
bp::def("device_type_to_string", &detail::to_string);
|
||||
|
||||
|
||||
bp::class_<atd::cl::Platform>("platform", bp::no_init)
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_platform_info<NAME>)
|
||||
WRAP("name", CL_PLATFORM_NAME)
|
||||
#undef WRAP
|
||||
.def("get_devices", &detail::get_devices)
|
||||
;
|
||||
|
||||
bp::class_<atd::cl::Device>("device", bp::no_init)
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_device_info<NAME>)
|
||||
.add_property("nv_compute_capability", &detail::nv_compute_capability)
|
||||
.add_property("platform", &detail::get_platform)
|
||||
WRAP("double_fp_config", CL_DEVICE_DOUBLE_FP_CONFIG)
|
||||
WRAP("name", CL_DEVICE_NAME)
|
||||
WRAP("type", CL_DEVICE_TYPE)
|
||||
WRAP("vendor", CL_DEVICE_VENDOR)
|
||||
#undef WRAP
|
||||
;
|
||||
|
||||
bp::class_<atd::cl::Context>("context", bp::init<atd::cl::Device>())
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_context_info<NAME>)
|
||||
#undef WRAP
|
||||
.add_property("queues", bp::make_function(&detail::get_queue, bp::return_internal_reference<>()))
|
||||
;
|
||||
|
||||
|
||||
|
||||
bp::class_<atd::cl::CommandQueue>("command_queue", bp::no_init)
|
||||
.add_property("device", &detail::get_device)
|
||||
bp::class_<atd::cl::CommandQueue>("command_queue", bp::init<atd::cl::Context, atd::cl::Device>())
|
||||
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_command_queue_info<NAME>)
|
||||
WRAP("device", CL_QUEUE_DEVICE)
|
||||
#undef WRAP
|
||||
.add_property("models", bp::make_function(&atd::get_model_map, bp::return_internal_reference<>()));
|
||||
;
|
||||
|
||||
bp::def("synchronize", &atd::cl::synchronize);
|
||||
bp::def("get_platforms", &detail::get_platforms);
|
||||
|
||||
}
|
||||
|
||||
namespace detail
|
||||
@@ -446,6 +524,7 @@ void export_array()
|
||||
.def(bp::init<atd::array_expression>())
|
||||
.add_property("dtype", &atd::array::dtype)
|
||||
.add_property("context", bp::make_function(&atd::array::context, bp::return_internal_reference<>()))
|
||||
.add_property("T", &atd::array::T)
|
||||
// .add_property("shape", &detail::get_shape, &detail::set_shape)
|
||||
ADD_ARRAY_OPERATOR(+)
|
||||
ADD_ARRAY_OPERATOR(-)
|
||||
@@ -477,8 +556,8 @@ void export_array()
|
||||
bp::def(#name, static_cast<atd::array_expression (*)(atd::array const &, atd::array_expression const &)>(&atd::name));\
|
||||
bp::def(#name, static_cast<atd::array_expression (*)(atd::array_expression const &, atd::array_expression const &)>(&atd::name));
|
||||
|
||||
MAP_FUNCTION(max)
|
||||
MAP_FUNCTION(min)
|
||||
MAP_FUNCTION(maximum)
|
||||
MAP_FUNCTION(minimum)
|
||||
MAP_FUNCTION(pow)
|
||||
MAP_FUNCTION(dot)
|
||||
#undef MAP_FUNCTION
|
||||
@@ -551,21 +630,24 @@ void export_model()
|
||||
#undef __PROP
|
||||
}
|
||||
|
||||
#define WRAP_TEMPLATE(name, ...) bp::class_<atidlas::base_impl<atidlas::name, atidlas::name::parameters_type>, bp::bases<atidlas::base>, boost::noncopyable>(#name "_base_impl", bp::no_init);\
|
||||
bp::class_<atidlas::name, bp::bases<atidlas::base_impl<atidlas::name, atidlas::name::parameters_type> > >(#name, bp::init<__VA_ARGS__>())\
|
||||
#define WRAP_BASE(name) bp::class_<atidlas::base_impl<atidlas::name, atidlas::name::parameters_type>, bp::bases<atidlas::base>, boost::noncopyable>(#name "_base_impl", bp::no_init);
|
||||
#define WRAP_TEMPLATE(name, basename, ...) bp::class_<atidlas::name, bp::bases<atidlas::base_impl<atidlas::basename, atidlas::basename::parameters_type> > >(#name, bp::init<__VA_ARGS__>())\
|
||||
.add_property("local_size_0", &atd::name::local_size_0)\
|
||||
.add_property("local_size_1", &atd::name::local_size_1);
|
||||
#define WRAP_SINGLE_TEMPLATE(name, ...) WRAP_BASE(name) WRAP_TEMPLATE(name, name, __VA_ARGS__)
|
||||
|
||||
//Vector AXPY
|
||||
WRAP_TEMPLATE(vaxpy, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_TEMPLATE(maxpy, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_TEMPLATE(reduction, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_TEMPLATE(mreduction_rows, uint, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_TEMPLATE(mreduction_cols, uint, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_TEMPLATE(mproduct_nn, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
WRAP_TEMPLATE(mproduct_tn, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
WRAP_TEMPLATE(mproduct_nt, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
WRAP_TEMPLATE(mproduct_tt, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
WRAP_SINGLE_TEMPLATE(vaxpy, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_SINGLE_TEMPLATE(maxpy, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_SINGLE_TEMPLATE(reduction, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_BASE(mreduction)
|
||||
WRAP_TEMPLATE(mreduction_rows, mreduction, uint, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_TEMPLATE(mreduction_cols, mreduction, uint, uint, uint, uint, atidlas::fetching_policy_type)
|
||||
WRAP_BASE(mproduct)
|
||||
WRAP_TEMPLATE(mproduct_nn, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
WRAP_TEMPLATE(mproduct_tn, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
WRAP_TEMPLATE(mproduct_nt, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
WRAP_TEMPLATE(mproduct_tt, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
|
||||
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user