Files
triton/python/pyatidlas/src/_atidlas.cpp
2015-02-11 02:06:16 -05:00

703 lines
27 KiB
C++

#include <list>
#include <functional>
#include <boost/python.hpp>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
#include <boost/python/suite/indexing/map_indexing_suite.hpp>
#include <boost/numpy.hpp>
#include <boost/numpy/dtype.hpp>
#include "atidlas/array.h"
#include "atidlas/backend/templates/vaxpy.h"
#include "atidlas/backend/templates/maxpy.h"
#include "atidlas/backend/templates/reduction.h"
#include "atidlas/backend/templates/mreduction.h"
#include "atidlas/backend/templates/mproduct.h"
#include "atidlas/model/model.h"
#define MAP_ENUM(v, ns) .value(#v, ns::v)
namespace bp = boost::python;
namespace atd = atidlas;
namespace np = boost::numpy;
namespace detail
{
atd::numeric_type to_atd_dtype(np::dtype const & T)
{
if(T==np::detail::get_int_dtype<8, false>()) return atd::CHAR_TYPE;
else if(T==np::detail::get_int_dtype<8, true>()) return atd::UCHAR_TYPE;
else if(T==np::detail::get_int_dtype<16, false>()) return atd::SHORT_TYPE;
else if(T==np::detail::get_int_dtype<16, true>()) return atd::USHORT_TYPE;
else if(T==np::detail::get_int_dtype<32, false>()) return atd::INT_TYPE;
else if(T==np::detail::get_int_dtype<32, true>()) return atd::UINT_TYPE;
else if(T==np::detail::get_int_dtype<64, false>()) return atd::LONG_TYPE;
else if(T==np::detail::get_int_dtype<64, true>()) return atd::ULONG_TYPE;
// else if(T==np::detail::get_float_dtype<16>()) return atd::HALF_TYPE;
else if(T==np::detail::get_float_dtype<32>()) return atd::FLOAT_TYPE;
else if(T==np::detail::get_float_dtype<64>()) return atd::DOUBLE_TYPE;
else{
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
bp::throw_error_already_set();
throw; // suppress warning; throw_error_already_set() never returns but isn't marked noreturn: https://svn.boost.org/trac/boost/ticket/1482
}
}
np::dtype to_np_dtype(atd::numeric_type const & T) throw()
{
if(T==atd::CHAR_TYPE) return np::detail::get_int_dtype<8, false>();
else if(T==atd::UCHAR_TYPE) return np::detail::get_int_dtype<8, true>();
else if(T==atd::SHORT_TYPE) return np::detail::get_int_dtype<16, false>();
else if(T==atd::USHORT_TYPE) return np::detail::get_int_dtype<16, true>();
else if(T==atd::INT_TYPE) return np::detail::get_int_dtype<32, false>();
else if(T==atd::UINT_TYPE) return np::detail::get_int_dtype<32, true>();
else if(T==atd::LONG_TYPE) return np::detail::get_int_dtype<64, false>();
else if(T==atd::ULONG_TYPE) return np::detail::get_int_dtype<64, true>();
// else if(T==atd::HALF_TYPE) return np::detail::get_float_dtype<16>();
else if(T==atd::FLOAT_TYPE) return np::detail::get_float_dtype<32>();
else if(T==atd::DOUBLE_TYPE) return np::detail::get_float_dtype<64>();
else{
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
bp::throw_error_already_set();
throw; // suppress warning; throw_error_already_set() never returns but isn't marked noreturn: https://svn.boost.org/trac/boost/ticket/1482
}
}
bp::tuple get_shape(atd::array const & x)
{
return bp::make_tuple(x.shape()._1, x.shape()._2);
}
//void set_shape(atd::array & x, bp::tuple const & t)
//{
// unsigned int len = bp::len(t);
// atd::int_t size1 = bp::extract<atd::int_t>(t[0]);
// atd::int_t size2 = len<2?1:bp::extract<atd::int_t>(t[1]);
// x.reshape(size1, size2);
//}
//boost::python::dict create_queues(atd::cl_ext::queues_t queues)
//{
// boost::python::dict dictionary;
// for (atd::cl_ext::queues_t::iterator it = queues.begin(); it != queues.end(); ++it) {
// bp::list list;
// for (atd::cl_ext::queues_t::mapped_type::iterator itt = it->second.begin(); itt != it->second.end(); ++itt)
// list.append(*itt);
// dictionary[it->first] = list;
// }
// return dictionary;
//}
template<class T>
struct datatype : public atd::value_scalar
{
datatype(T t) : atd::value_scalar(t){ }
};
template<class T>
unsigned int size(datatype<T> const & dt)
{ return atd::size_of(dt.dtype()) ; }
#define INSTANTIATE(name, clname) \
struct name : public detail::datatype<clname> { name(clname value) : detail::datatype<clname>(value){} };
INSTANTIATE(int8, cl_char)
INSTANTIATE(uint8, cl_uchar)
INSTANTIATE(int16, cl_short)
INSTANTIATE(uint16, cl_ushort)
INSTANTIATE(int32, cl_int)
INSTANTIATE(uint32, cl_uint)
INSTANTIATE(int64, cl_long)
INSTANTIATE(uint64, cl_ulong)
INSTANTIATE(float32, cl_float)
INSTANTIATE(float64, cl_double)
#undef INSTANTIATE
}
void export_core()
{
#define INSTANTIATE(name, clname) \
bp::class_<detail::datatype<clname>, bp::bases<atd::value_scalar> >(#name, bp::init<clname>());\
bp::class_<detail::name, bp::bases<detail::datatype<clname> > >(#name, bp::init<clname>())\
.add_property("size", &detail::size<clname>)\
;
INSTANTIATE(int8, cl_char)
INSTANTIATE(uint8, cl_uchar)
INSTANTIATE(int16, cl_short)
INSTANTIATE(uint16, cl_ushort)
INSTANTIATE(int32, cl_int)
INSTANTIATE(uint32, cl_uint)
INSTANTIATE(int64, cl_long)
INSTANTIATE(uint64, cl_ulong)
INSTANTIATE(float32, cl_float)
INSTANTIATE(float64, cl_double)
#undef INSTANTIATE
bp::enum_<atd::expression_type>("operations")
MAP_ENUM(VECTOR_AXPY_TYPE, atd)
MAP_ENUM(MATRIX_AXPY_TYPE, atd)
MAP_ENUM(REDUCTION_TYPE, atd)
MAP_ENUM(ROW_WISE_REDUCTION_TYPE, atd)
MAP_ENUM(COL_WISE_REDUCTION_TYPE, atd)
MAP_ENUM(VECTOR_AXPY_TYPE, atd)
MAP_ENUM(VECTOR_AXPY_TYPE, atd)
MAP_ENUM(VECTOR_AXPY_TYPE, atd)
MAP_ENUM(VECTOR_AXPY_TYPE, atd)
;
}
namespace detail
{
template<class IT>
bp::list to_list(IT const & begin, IT const & end)
{
bp::list res;
for (IT it = begin; it != end; ++it)
res.append(*it);
return res;
}
template<class T>
std::vector<T> to_vector(bp::list const & list)
{
std::size_t len = bp::len(list);
std::vector<T> res; res.reserve(len);
for(int i = 0 ; i < len ; ++i)
res.push_back(boost::python::extract<T>(list[i]));
return res;
}
bp::list nv_compute_capability(cl::Device const & device)
{
bp::list res;
cl_uint cmin, cmaj;
clGetDeviceInfo(device(), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), (cl_uint*)&cmaj, NULL);
clGetDeviceInfo(device(), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), (cl_uint*)&cmin, NULL);
res.append(cmaj);
res.append(cmin);
return res;
}
bp::list get_platforms()
{
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
return to_list(platforms.begin(), platforms.end());
}
bp::list get_devices(cl::Platform const & platform)
{
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
return to_list(devices.begin(), devices.end());
}
atd::numeric_type extract_dtype(bp::object const & odtype)
{
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="class")
name = bp::extract<std::string>(odtype.attr("__name__"))();
else
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="int8") return atd::CHAR_TYPE;
else if(name=="uint8") return atd::UCHAR_TYPE;
else if(name=="int16") return atd::SHORT_TYPE;
else if(name=="uint16") return atd::USHORT_TYPE;
else if(name=="int32") return atd::INT_TYPE;
else if(name=="uint32") return atd::UINT_TYPE;
else if(name=="int64") return atd::LONG_TYPE;
else if(name=="uint64") return atd::ULONG_TYPE;
else if(name=="float32") return atd::FLOAT_TYPE;
else if(name=="float64") return atd::DOUBLE_TYPE;
else
{
PyErr_SetString(PyExc_TypeError, "Data type not understood");
bp::throw_error_already_set();
throw;
}
}
atd::expression_type extract_template_type(bp::object const & odtype)
{
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="class")
name = bp::extract<std::string>(odtype.attr("__name__"))();
else
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="vaxpy") return atd::VECTOR_AXPY_TYPE;
else if(name=="maxpy") return atd::MATRIX_AXPY_TYPE;
else if(name=="reduction") return atd::REDUCTION_TYPE;
else if(name=="mreduction_rows") return atd::ROW_WISE_REDUCTION_TYPE;
else if(name=="mreduction_cols") return atd::COL_WISE_REDUCTION_TYPE;
else if(name=="mproduct_nn") return atd::MATRIX_PRODUCT_NN_TYPE;
else if(name=="mproduct_tn") return atd::MATRIX_PRODUCT_TN_TYPE;
else if(name=="mproduct_nt") return atd::MATRIX_PRODUCT_NT_TYPE;
else if(name=="mproduct_tt") return atd::MATRIX_PRODUCT_TT_TYPE;
else
{
PyErr_SetString(PyExc_TypeError, "Template type not understood");
bp::throw_error_already_set();
throw;
}
}
struct model_map_indexing
{
static atd::model& get_item(atd::model_map_t& container, bp::tuple i_)
{
atd::expression_type expression = extract_template_type(i_[0]);
atd::numeric_type dtype = extract_dtype(i_[1]);
atd::model_map_t::iterator i = container.find(std::make_pair(expression, dtype));
if (i == container.end())
{
PyErr_SetString(PyExc_KeyError, "Invalid key");
bp::throw_error_already_set();
}
return *i->second;
}
static void set_item(atd::model_map_t& container, bp::tuple i_, atd::model const & v)
{
atd::expression_type expression = extract_template_type(i_[0]);
atd::numeric_type dtype = extract_dtype(i_[1]);
container[std::make_pair(expression, dtype)].reset(new atd::model(v));
}
};
cl::Platform get_platform(cl::Device const & device)
{ return cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()); }
template<cl_int INFO>
typename cl::detail::param_traits<cl::detail::cl_device_info, INFO>::param_type
wrap_device_info(cl::Device const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename cl::detail::param_traits<cl::detail::cl_context_info, INFO>::param_type
wrap_context_info(cl::Context const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename cl::detail::param_traits<cl::detail::cl_platform_info, INFO>::param_type
wrap_platform_info(cl::Platform const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename cl::detail::param_traits<cl::detail::cl_command_queue_info, INFO>::param_type
wrap_command_queue_info(cl::CommandQueue const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename cl::detail::param_traits<cl::detail::cl_profiling_info, INFO>::param_type
wrap_profiling_info(cl::Event const & x)
{ return x.getProfilingInfo<INFO>(NULL); }
std::string to_string(cl_device_type type)
{
if(type==CL_DEVICE_TYPE_ALL) return "ALL";
if(type==CL_DEVICE_TYPE_CPU) return "CPU";
if(type==CL_DEVICE_TYPE_GPU) return "GPU";
if(type==CL_DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
throw;
}
boost::shared_ptr<cl::Context> make_context(cl::Device const & dev)
{ return boost::shared_ptr<cl::Context>(new cl::Context(std::vector<cl::Device>(1, dev))); }
bp::tuple flush(atd::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
{
std::list<cl::Event> events;
atd::operation_cache cache;
std::vector<cl::Event> cdependencies = to_vector<cl::Event>(dependencies);
boost::shared_ptr<atd::array> parray(new atd::array(atd::control(expression, atd::execution_options_type(queue_id, &events, &cache, &cdependencies),
atd::dispatcher_options_type(tune, label), atd::compilation_options_type(program_name, force_recompile))));
return bp::make_tuple(parray, to_list(events.begin(), events.end()), cache);
}
}
struct state_type{ };
state_type state;
void export_cl()
{
typedef std::vector<cl::CommandQueue> queues_t;
bp::class_<queues_t>("queues")
.def("__len__", &queues_t::size)
.def("__getitem__", &bp::vector_indexing_suite<queues_t>::get_item, bp::return_internal_reference<>())
.def("__setitem__", &bp::vector_indexing_suite<queues_t>::set_item, bp::with_custodian_and_ward<1,2>())
.def("append", &bp::vector_indexing_suite<queues_t>::append)
;
bp::class_<atd::model_map_t>("models")
.def("__getitem__", &detail::model_map_indexing::get_item, bp::return_internal_reference<>())
.def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>())
;
bp::enum_<cl_device_type>("device_type")
.value("CL_DEVICE_TYPE_ALL", CL_DEVICE_TYPE_ALL)
.value("CL_DEVICE_TYPE_CPU", CL_DEVICE_TYPE_CPU)
.value("CL_DEVICE_TYPE_GPU", CL_DEVICE_TYPE_GPU)
.value("CL_DEVICE_TYPE_ACCELERATOR", CL_DEVICE_TYPE_ACCELERATOR)
;
bp::def("device_type_to_string", &detail::to_string);
bp::class_<cl::Platform>("platform", bp::no_init)
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_platform_info<NAME>)
WRAP("name", CL_PLATFORM_NAME)
#undef WRAP
.def("get_devices", &detail::get_devices)
;
bp::class_<cl::Device>("device", bp::no_init)
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_device_info<NAME>)
.add_property("nv_compute_capability", &detail::nv_compute_capability)
.add_property("platform", &detail::get_platform)
WRAP("double_fp_config", CL_DEVICE_DOUBLE_FP_CONFIG)
WRAP("name", CL_DEVICE_NAME)
WRAP("type", CL_DEVICE_TYPE)
WRAP("vendor", CL_DEVICE_VENDOR)
#undef WRAP
;
bp::class_<cl::Context>("context", bp::no_init)
.def("__init__", bp::make_constructor(&detail::make_context))
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_context_info<NAME>)
#undef WRAP
.add_property("queues", bp::make_function(static_cast<std::vector<cl::CommandQueue> & (*)(const cl::Context&)>( [](const cl::Context & ctx) -> std::vector<cl::CommandQueue> & { return atd::cl_ext::queues[ctx]; }) , bp::return_internal_reference<>()))
;
bp::class_<cl::CommandQueue>("command_queue", bp::init<cl::Context, cl::Device>())
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_command_queue_info<NAME>)
WRAP("device", CL_QUEUE_DEVICE)
#undef WRAP
.add_property("models", bp::make_function(&atd::get_model_map, bp::return_internal_reference<>()));
;
bp::class_<cl::Event>("event")
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_profiling_info<NAME>)
WRAP("start", CL_PROFILING_COMMAND_START)
WRAP("submit", CL_PROFILING_COMMAND_SUBMIT)
WRAP("end", CL_PROFILING_COMMAND_END)
;
bp::class_<atd::operation_cache>("operation_cache", bp::no_init)
.def("enqueue", &atd::operation_cache::enqueue)
;
bp::def("synchronize", &atd::cl_ext::synchronize);
bp::def("get_platforms", &detail::get_platforms);
bp::def("flush", &detail::flush, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::class_<state_type>("state_type")
.def_readwrite("queue_properties",&atd::cl_ext::queue_properties)
;
bp::scope().attr("state") = bp::object(bp::ptr(&state));
bp::scope().attr("CL_QUEUE_PROFILING_ENABLE") = CL_QUEUE_PROFILING_ENABLE;
bp::scope().attr("CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE") = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
}
namespace detail
{
boost::shared_ptr<atd::array>
ndarray_to_atdarray(const np::ndarray& array, const cl::Context& ctx)
{
int d = array.get_nd();
if (d > 2) {
PyErr_SetString(PyExc_TypeError, "Only 1-D and 2-D arrays are supported!");
bp::throw_error_already_set();
}
atd::numeric_type dtype = to_atd_dtype(array.get_dtype());
atd::int_t size = (atd::int_t)array.shape(0);
atd::array* v = new atd::array(size, dtype, ctx);
void* data = (void*)array.get_data();
atd::copy(data, *v);
return boost::shared_ptr<atd::array>(v);
}
boost::shared_ptr<atd::array> create_array(bp::object const & obj, bp::object odtype, cl::Context context)
{
return ndarray_to_atdarray(np::from_object(obj, to_np_dtype(extract_dtype(odtype))), context);
}
boost::shared_ptr<atd::array> create_empty_array(bp::object sizes, bp::object odtype, cl::Context context)
{
typedef boost::shared_ptr<atd::array> result_type;
std::size_t len;
int size1;
int size2;
try{
len = bp::len(sizes);
size1 = bp::extract<int>(sizes[0])();
size2 = bp::extract<int>(sizes[1])();
}catch(bp::error_already_set const &){
PyErr_Clear();
len = 1;
size1 = bp::extract<int>(sizes)();
}
atd::numeric_type dtype = extract_dtype(odtype);
if(len < 1 || len > 2)
{
PyErr_SetString(PyExc_TypeError, "Only 1-D and 2-D arrays are supported!");
bp::throw_error_already_set();
}
if(len==1)
return result_type(new atd::array(size1, dtype, context));
return result_type(new atd::array(size1, size2, dtype, context));
}
std::string type_name(bp::object const & obj)
{
std::string name = bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
if(name=="class")
return bp::extract<std::string>(obj.attr("__name__"))();
else
return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
}
boost::shared_ptr<atd::scalar> construct_scalar(bp::object obj, cl::Context const & context)
{
typedef boost::shared_ptr<atd::scalar> result_type;
std::string name = type_name(obj);
if(name=="int") return result_type(new atd::scalar(bp::extract<int>(obj)(), context));
else if(name=="float") return result_type(new atd::scalar(bp::extract<double>(obj)(), context));
else if(name=="long") return result_type(new atd::scalar(bp::extract<long>(obj)(), context));
else if(name=="int") return result_type(new atd::scalar(bp::extract<int>(obj)(), context));
else if(name=="int8") return result_type(new atd::scalar(atd::CHAR_TYPE, context));
else if(name=="uint8") return result_type(new atd::scalar(atd::UCHAR_TYPE, context));
else if(name=="int16") return result_type(new atd::scalar(atd::SHORT_TYPE, context));
else if(name=="uint16") return result_type(new atd::scalar(atd::USHORT_TYPE, context));
else if(name=="int32") return result_type(new atd::scalar(atd::INT_TYPE, context));
else if(name=="uint32") return result_type(new atd::scalar(atd::UINT_TYPE, context));
else if(name=="int64") return result_type(new atd::scalar(atd::LONG_TYPE, context));
else if(name=="uint64") return result_type(new atd::scalar(atd::ULONG_TYPE, context));
else if(name=="float32") return result_type(new atd::scalar(atd::FLOAT_TYPE, context));
else if(name=="float64") return result_type(new atd::scalar(atd::DOUBLE_TYPE, context));
else{
PyErr_SetString(PyExc_TypeError, "Data type not understood");
bp::throw_error_already_set();
throw;
}
}
}
void export_array()
{
#define ADD_SCALAR_HANDLING(OP)\
.def(bp::self OP int())\
.def(bp::self OP long())\
.def(bp::self OP double())\
.def(bp::self OP bp::other<atd::value_scalar>())\
.def(int() OP bp::self)\
.def(long() OP bp::self)\
.def(double() OP bp::self)\
.def(bp::other<atd::value_scalar>() OP bp::self)
#define ADD_ARRAY_OPERATOR(OP)\
.def(bp::self OP bp::self)\
ADD_SCALAR_HANDLING(OP)
bp::class_<atd::expressions_tuple>
("array_expression_container", bp::init<atd::array_expression const &>())
;
bp::class_<atd::array_expression >("array_expression", bp::no_init)
ADD_ARRAY_OPERATOR(+)
ADD_ARRAY_OPERATOR(-)
ADD_ARRAY_OPERATOR(*)
ADD_ARRAY_OPERATOR(/)
ADD_ARRAY_OPERATOR(>)
ADD_ARRAY_OPERATOR(>=)
ADD_ARRAY_OPERATOR(<)
ADD_ARRAY_OPERATOR(<=)
ADD_ARRAY_OPERATOR(==)
ADD_ARRAY_OPERATOR(!=)
.add_property("context", bp::make_function(&atd::array_expression::context, bp::return_internal_reference<>()))
.def(bp::self_ns::abs(bp::self))
// .def(bp::self_ns::pow(bp::self))
;
#undef ADD_ARRAY_OPERATOR
#define ADD_ARRAY_OPERATOR(OP) \
.def(bp::self OP bp::self)\
.def(bp::self OP bp::other<atd::array_expression>())\
.def(bp::other<atd::array_expression>() OP bp::self) \
ADD_SCALAR_HANDLING(OP)
bp::class_<atd::array,
boost::shared_ptr<atd::array> >
( "array", bp::no_init)
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl_ext::default_context())))
.def(bp::init<atd::array_expression>())
.add_property("dtype", &atd::array::dtype)
.add_property("context", bp::make_function(&atd::array::context, bp::return_internal_reference<>()))
.add_property("T", &atd::array::T)
// .add_property("shape", &detail::get_shape, &detail::set_shape)
ADD_ARRAY_OPERATOR(+)
ADD_ARRAY_OPERATOR(-)
ADD_ARRAY_OPERATOR(*)
ADD_ARRAY_OPERATOR(/)
ADD_ARRAY_OPERATOR(>)
ADD_ARRAY_OPERATOR(>=)
ADD_ARRAY_OPERATOR(<)
ADD_ARRAY_OPERATOR(<=)
ADD_ARRAY_OPERATOR(==)
ADD_ARRAY_OPERATOR(!=)
.def(bp::self_ns::abs(bp::self))
// .def(bp::self_ns::pow(bp::self))
.def(bp::self_ns::str(bp::self_ns::self))
;
bp::class_<atd::scalar, bp::bases<atd::array> >
("scalar", bp::no_init)
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=atd::cl_ext::default_context())))
;
//Other numpy-like initializers
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=atd::cl_ext::default_context()));
//Binary
#define MAP_FUNCTION(name) \
bp::def(#name, static_cast<atd::array_expression (*)(atd::array const &, atd::array const &)>(&atd::name));\
bp::def(#name, static_cast<atd::array_expression (*)(atd::array_expression const &, atd::array const &)>(&atd::name));\
bp::def(#name, static_cast<atd::array_expression (*)(atd::array const &, atd::array_expression const &)>(&atd::name));\
bp::def(#name, static_cast<atd::array_expression (*)(atd::array_expression const &, atd::array_expression const &)>(&atd::name));
MAP_FUNCTION(maximum)
MAP_FUNCTION(minimum)
MAP_FUNCTION(pow)
MAP_FUNCTION(dot)
#undef MAP_FUNCTION
//Unary
#define MAP_FUNCTION(name) \
bp::def(#name, static_cast<atd::array_expression (*)(atd::array const &)>(&atd::name));\
bp::def(#name, static_cast<atd::array_expression (*)(atd::array_expression const &)>(&atd::name));
MAP_FUNCTION(abs)
MAP_FUNCTION(acos)
MAP_FUNCTION(asin)
MAP_FUNCTION(atan)
MAP_FUNCTION(ceil)
MAP_FUNCTION(cos)
MAP_FUNCTION(cosh)
MAP_FUNCTION(exp)
MAP_FUNCTION(floor)
MAP_FUNCTION(log)
MAP_FUNCTION(log10)
MAP_FUNCTION(sin)
MAP_FUNCTION(sinh)
MAP_FUNCTION(sqrt)
MAP_FUNCTION(tan)
MAP_FUNCTION(tanh)
#undef MAP_FUNCTION
/*--- Reduction operators----*/
//---------------------------------------
#define MAP_FUNCTION(name) \
bp::def(#name, static_cast<atd::array_expression (*)(atd::array const &, atd::int_t)>(&atd::name));\
bp::def(#name, static_cast<atd::array_expression (*)(atd::array_expression const &, atd::int_t)>(&atd::name));
MAP_FUNCTION(sum)
MAP_FUNCTION(max)
MAP_FUNCTION(min)
MAP_FUNCTION(argmax)
MAP_FUNCTION(argmin)
#undef MAP_FUNCTION
}
void export_scalar()
{
bp::class_<atd::value_scalar>("value_scalar", bp::no_init)
.add_property("dtype", &atd::value_scalar::dtype);
}
void export_model()
{
bp::class_<atidlas::model>("model", bp::init<atd::base const &, cl::CommandQueue&>())
.def("execute", &atd::model::execute);
bp::enum_<atidlas::fetching_policy_type>
("fetching_policy_type")
.value("FETCH_FROM_LOCAL", atd::FETCH_FROM_LOCAL)
.value("FETCH_FROM_GLOBAL_STRIDED", atd::FETCH_FROM_GLOBAL_STRIDED)
.value("FETCH_FROM_GLOBAL_CONTIGUOUS", atd::FETCH_FROM_GLOBAL_CONTIGUOUS)
;
//Base
{
#define __PROP(name) .def_readonly(#name, &atidlas::base::parameters_type::name)
bp::class_<atidlas::base, boost::noncopyable>("base", bp::no_init)
.def("lmem_usage", &atidlas::base::lmem_usage)
.def("registers_usage", &atidlas::base::registers_usage)
.def("check_invalid", &atidlas::base::check_invalid)
;
#undef __PROP
}
#define WRAP_BASE(name) bp::class_<atidlas::base_impl<atidlas::name, atidlas::name::parameters_type>, bp::bases<atidlas::base>, boost::noncopyable>(#name "_base_impl", bp::no_init);
#define WRAP_TEMPLATE(name, basename, ...) bp::class_<atidlas::name, bp::bases<atidlas::base_impl<atidlas::basename, atidlas::basename::parameters_type> > >(#name, bp::init<__VA_ARGS__>())\
.add_property("local_size_0", &atd::name::local_size_0)\
.add_property("local_size_1", &atd::name::local_size_1);
#define WRAP_SINGLE_TEMPLATE(name, ...) WRAP_BASE(name) WRAP_TEMPLATE(name, name, __VA_ARGS__)
//Vector AXPY
WRAP_SINGLE_TEMPLATE(vaxpy, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(maxpy, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(reduction, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_BASE(mreduction)
WRAP_TEMPLATE(mreduction_rows, mreduction, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_TEMPLATE(mreduction_cols, mreduction, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_BASE(mproduct)
WRAP_TEMPLATE(mproduct_nn, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_tn, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_nt, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_tt, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
}
BOOST_PYTHON_MODULE(_atidlas)
{
Py_Initialize();
np::initialize();
// specify that this module is actually a package
bp::object package = bp::scope();
package.attr("__path__") = "_atidlas";
export_scalar();
export_core();
export_cl();
export_model();
export_array();
}