Bugfix in autotuner

This commit is contained in:
Philippe Tillet
2015-01-21 20:08:52 -05:00
parent d285bd81e0
commit 9a76be3edc
7 changed files with 191 additions and 117 deletions

View File

@@ -19,14 +19,14 @@ class array: public obj_base
public:
//1D Constructors
array(int_t size1, numeric_type dtype, cl::Context context = cl::default_context());
template<typename T>
array(std::vector<T> const & data, cl::Context context = cl::default_context());
template<typename DT>
array(std::vector<DT> const & data, cl::Context context = cl::default_context());
array(array & v, slice const & s1);
//2D Constructors
array(int_t size1, int_t size2, numeric_type dtype, cl::Context context = cl::default_context());
template<typename T>
array(int_t size1, int_t size2, std::vector<T> const & data, cl::Context context = cl::default_context());
template<typename DT>
array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context = cl::default_context());
array(array & M, slice const & s1, slice const & s2);
//General constructor
@@ -72,6 +72,8 @@ public:
scalar operator[](int_t);
array operator[](slice const &);
array operator()(slice const &, slice const &);
array_expression T() const;
protected:
numeric_type dtype_;
@@ -113,9 +115,7 @@ public:
};
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl::default_context());
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl::default_context());
array reshape(array const &, int_t, int_t);
//copy
@@ -209,6 +209,10 @@ ATIDLAS_DECLARE_REDUCTION(max)
ATIDLAS_DECLARE_REDUCTION(min)
ATIDLAS_DECLARE_REDUCTION(argmin)
atidlas::array_expression eye(std::size_t, std::size_t, atidlas::numeric_type, cl::Context ctx = cl::default_context());
array_expression zeros(std::size_t M, std::size_t N, numeric_type dtype, cl::Context ctx = cl::default_context());
array reshape(array const &, int_t, int_t);
//
std::ostream& operator<<(std::ostream &, array const &);
std::ostream& operator<<(std::ostream & os, scalar const & s);

View File

@@ -104,6 +104,7 @@ enum operation_node_type
OPERATOR_MATRIX_ROW_TYPE,
OPERATOR_MATRIX_COLUMN_TYPE,
OPERATOR_REPEAT_TYPE,
OPERATOR_SHIFT_TYPE,
OPERATOR_VDIAG_TYPE,
OPERATOR_MATRIX_PRODUCT_NN_TYPE,

View File

@@ -19,9 +19,9 @@ array::array(int_t size1, numeric_type dtype, cl::Context context) :
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype)*dsize())
{ }
template<class T>
array::array(std::vector<T> const & x, cl::Context context):
dtype_(to_numeric_type<T>::value), shape_(x.size(), 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
template<class DT>
array::array(std::vector<DT> const & x, cl::Context context):
dtype_(to_numeric_type<DT>::value), shape_(x.size(), 1), start_(0, 0), stride_(1, 1), ld_(shape_._1),
context_(context), data_(context, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
{ *this = x; }
@@ -53,9 +53,9 @@ array::array(array & M, slice const & s1, slice const & s2) : dtype_(M.dtype_),
context_(M.data_.getInfo<CL_MEM_CONTEXT>()), data_(M.data_)
{ }
template<typename T>
array::array(int_t size1, int_t size2, std::vector<T> const & data, cl::Context context)
: dtype_(to_numeric_type<T>::value),
template<typename DT>
array::array(int_t size1, int_t size2, std::vector<DT> const & data, cl::Context context)
: dtype_(to_numeric_type<DT>::value),
shape_(size1, size2), start_(0, 0), stride_(1, 1), ld_(size1),
context_(context), data_(context_, CL_MEM_READ_WRITE, size_of(dtype_)*dsize())
{
@@ -146,8 +146,8 @@ array & array::operator=(array_expression const & rhs)
return *this;
}
template<class T>
array & array::operator=(std::vector<T> const & rhs)
template<class DT>
array & array::operator=(std::vector<DT> const & rhs)
{
assert(nshape()==1);
atidlas::copy(rhs, *this);
@@ -208,6 +208,9 @@ array & array::operator/=(array const & rhs)
array & array::operator/=(array_expression const & rhs)
{ return *this = array_expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_DIV_TYPE), shape_); }
array_expression array::T() const
{ return atidlas::trans(*this) ;}
/*--- Indexing operators -----*/
//---------------------------------------
scalar array::operator [](int_t idx)
@@ -481,17 +484,17 @@ atidlas::array_expression zeros(std::size_t M, std::size_t N, atidlas::numeric_t
return array_expression(value_scalar(0), lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_ADD_TYPE), ctx, dtype, size4(M, N));
}
inline size4 trans(size4 const & shape)
inline size4 flip(size4 const & shape)
{ return size4(shape._2, shape._1);}
inline size4 prod(size4 const & shape1, size4 const & shape2)
{ return size4(shape1._1*shape2._1, shape1._2*shape2._2);}
array_expression trans(array const & x) \
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), trans(x.shape())); }\
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), x.context(), x.dtype(), flip(x.shape())); }\
\
array_expression trans(array_expression const & x) \
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), trans(x.shape())); }
{ return array_expression(x, lhs_rhs_element(), op_element(OPERATOR_UNARY_TYPE_FAMILY, OPERATOR_TRANS_TYPE), flip(x.shape())); }
array_expression repmat(array const & A, int_t const & rep1, int_t const & rep2)
{

View File

@@ -3,7 +3,6 @@ from __future__ import division
import argparse, itertools, os, sys, json
import misc_tools, optimize, dataset
import pyatidlas as atd
import pyopencl as cl
import numpy as np
from numpy import random
@@ -34,7 +33,8 @@ TYPES = { 'vaxpy': {'template':atd.vaxpy,
def do_tuning(args):
device = args.device
context = atd.context(device)
context.queues.append(atd.command_queue(context, device))
if os.path.isfile(args.json_file):
json_out = json.load(open(args.json_file, 'r'))
else:
@@ -98,7 +98,7 @@ def do_tuning(args):
D = json_out[full_operation][dtypestr]
if args.method == 'simple':
print default_tuning_sizes[operation]
print 'Size : ', ','.join(map(str, default_tuning_sizes[operation]))
profiles = [execution_handler(map(int,default_tuning_sizes[operation]))]
else:
def compute_perf(x, t):
@@ -125,48 +125,48 @@ def do_tuning(args):
#Vector AXPY
if operation=='vaxpy':
def execution_handler(sizes, fname=os.devnull, parameters=None):
x = atd.empty(sizes[0], datatype)
y = atd.empty(sizes[0], datatype)
x = atd.empty(sizes[0], datatype, context=context)
y = atd.empty(sizes[0], datatype, context=context)
return execute(x + y, sizes, Template, parameters, fname)
tune(execution_handler, 1e3, 2e7, 1, (),'log', 'log')
#dot
if operation=='dot':
def execution_handler(sizes, fname=os.devnull, parameters=None):
x = atd.empty(sizes[0], datatype)
y = atd.empty(sizes[0], datatype)
x = atd.empty(sizes[0], datatype, context=context)
y = atd.empty(sizes[0], datatype, context=context)
s = atd.scalar(datatype)
return execute(atd.dot(x, y), sizes, Template, parameters, fname)
tune(execution_handler, 1e3, 2e7, 1, (),'log', 'log')
#Matrix AXPY
if operation=='maxpy':
def execution_handler(sizes, fname=os.devnull, parameters=None):
A = atd.empty(sizes, datatype)
C = atd.empty(sizes, datatype)
A = atd.empty(sizes, datatype, context=context)
C = atd.empty(sizes, datatype, context=context)
return execute(A + C, sizes, Template, parameters, fname)
tune(execution_handler, 100, 5000, 2, (),'log', 'log')
#Row-wise dot
if operation=='gemv':
for A_trans in args.gemv_layouts:
def execution_handler(sizes, fname=os.devnull, parameters=None):
Template = Template[A_trans]
A = atd.empty(sizes if A_trans=='N' else sizes[::-1], datatype)
x = atd.empty(sizes[1], datatype)
def execution_handler(sizes, fname=os.devnull, parameters=None):
A = atd.empty(sizes if A_trans=='N' else sizes[::-1], datatype, context=context)
x = atd.empty(sizes[1], datatype, context=context)
LHS = A if A_trans=='N' else A.T
return execute(device, atd.dot(LHS, x), sizes, Template, parameters, fname)
return execute(atd.dot(LHS, x), sizes, Template, parameters, fname)
tune(execution_handler, 100, 5000, 2, (A_trans,),'log', 'log')
#Matrix Product
if operation=='gemm':
for L in args.gemm_layouts:
A_trans = L[0]
B_trans = L[1]
Template = Template[(A_trans, B_trans)]
def execution_handler(sizes, fname=os.devnull, parameters=None):
Template = Template[A_trans, B_trans]
A = atd.empty((sizes[0], sizes[2]) if A_trans=='N' else (sizes[2], sizes[0]), datatype)
B = atd.empty((sizes[2], sizes[1]) if B_trans=='N' else (sizes[1], sizes[2]), datatype)
A = atd.empty((sizes[0], sizes[2]) if A_trans=='N' else (sizes[2], sizes[0]), datatype, context=context)
B = atd.empty((sizes[2], sizes[1]) if B_trans=='N' else (sizes[1], sizes[2]), datatype, context=context)
LHS = A if A_trans=='N' else A.T
RHS = B if B_trans=='N' else B.T
return execute(device, atd.dot(LHS, RHS),(A_trans,B_trans), sizes, fname, parameters)
tune(execution_handler, 100, 2000, 3,(A_trans,B_trans), 'linear')
return execute(atd.dot(LHS, RHS), sizes, Template, parameters, fname)
tune(execution_handler, 100, 2000, 3,(A_trans,B_trans), 'linear', 'linear')
json.dump(json_out, open(args.json_file,'w'))
@@ -177,25 +177,6 @@ class ArgumentsHandler:
def __init__(self):
#No action argument -> interactive tuning
if len(sys.argv)==1:
def add_input(help, default):
return raw_input(help + "[" + default + "] : ") or default
self.device = add_input('Device to tune for','0')
self.operations = add_input('Operations to tune for','vaxpy,maxpy,dot,gemv,gemm-float32')
self.gemm_layouts = add_input('GEMV Layouts', 'NN,NT,TN,TT')
self.gemv_layouts = add_input('GEMV Layouts', 'N,T')
self.json_file = add_input('JSON File', misc_tools.sanitize_string(devices[int(self.device)].name) + '.json')
self.method = add_input('Tuning type', 'simple')
if self.method == 'simple':
self.blas1_size = add_input('BLAS1 size', '10e6')
self.blas2_size = add_input('BLAS2 sizes (M,N)', '2560,2560').split(',')
self.blas3_size = add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')
else:
self.build_model = True
self.sample_size = 30
else:
#Command line arguments
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest='action')
@@ -236,12 +217,13 @@ class ArgumentsHandler:
if __name__ == "__main__":
devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
platforms = atd.get_platforms()
devices = [d for platform in platforms for d in platform.get_devices()]
print("----------------")
print("Devices available:")
print("----------------")
for (i, d) in enumerate(devices):
print 'Device', i, '|', cl.device_type.to_string(d.type), '|', d.name, 'on', d.platform.name
print 'Device', i, '|', atd.device_type_to_string(d.type), '|', d.name, 'on', d.platform.name
print("----------------")
args = ArgumentsHandler()

View File

@@ -1,6 +1,5 @@
from __future__ import division
import pyopencl
import time
import os
import sys
@@ -186,10 +185,13 @@ class OccupancyRecord:
def __init__(self, dev, threads, shared_mem=0, registers=0):
if 'advanced micro devices' in dev.vendor.lower():
vendor = dev.vendor.lower()
if any(X in vendor for X in ['advanced micro devices', 'amd']):
self.init_amd(dev, threads, shared_mem, registers)
elif 'nvidia' in dev.vendor.lower():
elif 'nvidia' in vendor:
self.init_nvidia(dev, threads, shared_mem, registers)
elif 'intel' in vendor:
self.occupancy = 100

Binary file not shown.

View File

@@ -85,13 +85,13 @@ bp::tuple get_shape(atd::array const & x)
return bp::make_tuple(x.shape()._1, x.shape()._2);
}
void set_shape(atd::array & x, bp::tuple const & t)
{
unsigned int len = bp::len(t);
atd::int_t size1 = bp::extract<atd::int_t>(t[0]);
atd::int_t size2 = len<2?1:bp::extract<atd::int_t>(t[1]);
x.reshape(size1, size2);
}
//void set_shape(atd::array & x, bp::tuple const & t)
//{
// unsigned int len = bp::len(t);
// atd::int_t size1 = bp::extract<atd::int_t>(t[0]);
// atd::int_t size2 = len<2?1:bp::extract<atd::int_t>(t[1]);
// x.reshape(size1, size2);
//}
boost::python::dict create_queues(atd::cl::queues_t queues)
{
@@ -182,6 +182,15 @@ void export_symbolic()
namespace detail
{
template<class IT>
bp::list to_list(IT const & begin, IT const & end)
{
bp::list res;
for (IT it = begin; it != end; ++it)
res.append(*it);
return res;
}
bp::list nv_compute_capability(atd::cl::Device const & device)
{
bp::list res;
@@ -190,16 +199,23 @@ namespace detail
return res;
}
std::string vendor(atd::cl::Device const & device){
return device.getInfo<CL_DEVICE_VENDOR>();
bp::list get_platforms()
{
std::vector<atd::cl::Platform> platforms;
atd::cl::Platform::get(&platforms);
return to_list(platforms.begin(), platforms.end());
}
bp::list get_devices(atd::cl::Platform const & platform)
{
std::vector<atd::cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
return to_list(devices.begin(), devices.end());
}
std::vector<atd::cl::CommandQueue> & get_queue(atd::cl::Context const & ctx)
{ return atd::cl::queues[ctx]; }
atd::cl::Device get_device(atd::cl::CommandQueue & queue)
{ return queue.getInfo<CL_QUEUE_DEVICE>(); }
atd::numeric_type extract_dtype(bp::object const & odtype)
{
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
@@ -272,20 +288,50 @@ namespace detail
}
};
atd::cl::Platform get_platform(atd::cl::Device const & device)
{ return atd::cl::Platform(device.getInfo<CL_DEVICE_PLATFORM>()); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_device_info, INFO>::param_type
wrap_device_info(atd::cl::Device const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_context_info, INFO>::param_type
wrap_context_info(atd::cl::Context const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_platform_info, INFO>::param_type
wrap_platform_info(atd::cl::Platform const & x)
{ return x.getInfo<INFO>(NULL); }
template<cl_int INFO>
typename atd::cl::detail::param_traits<atd::cl::detail::cl_command_queue_info, INFO>::param_type
wrap_command_queue_info(atd::cl::CommandQueue const & x)
{ return x.getInfo<INFO>(NULL); }
std::string to_string(cl_device_type type)
{
if(type==CL_DEVICE_TYPE_ALL) return "ALL";
if(type==CL_DEVICE_TYPE_CPU) return "CPU";
if(type==CL_DEVICE_TYPE_GPU) return "GPU";
if(type==CL_DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
throw;
}
}
void export_cl()
{
typedef std::vector<atd::cl::CommandQueue> queues_t;
bp::class_<queues_t>("queues")
.def("__len__", &queues_t::size)
.def("__getitem__", &bp::vector_indexing_suite<queues_t>::get_item, bp::return_internal_reference<>())
.def("__setitem__", &bp::vector_indexing_suite<queues_t>::set_item, bp::with_custodian_and_ward<1,2>())
;
.def("append", &bp::vector_indexing_suite<queues_t>::append)
bp::class_<atd::cl::Device>("device", bp::no_init)
.add_property("nv_compute_capability", &detail::nv_compute_capability)
.add_property("vendor", &detail::vendor)
;
bp::class_<atd::model_map_t>("models")
@@ -293,18 +339,50 @@ void export_cl()
.def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>())
;
bp::class_<atd::cl::Context>("context", bp::no_init)
bp::enum_<cl_device_type>("device_type")
.value("CL_DEVICE_TYPE_ALL", CL_DEVICE_TYPE_ALL)
.value("CL_DEVICE_TYPE_CPU", CL_DEVICE_TYPE_CPU)
.value("CL_DEVICE_TYPE_GPU", CL_DEVICE_TYPE_GPU)
.value("CL_DEVICE_TYPE_ACCELERATOR", CL_DEVICE_TYPE_ACCELERATOR)
;
bp::def("device_type_to_string", &detail::to_string);
bp::class_<atd::cl::Platform>("platform", bp::no_init)
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_platform_info<NAME>)
WRAP("name", CL_PLATFORM_NAME)
#undef WRAP
.def("get_devices", &detail::get_devices)
;
bp::class_<atd::cl::Device>("device", bp::no_init)
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_device_info<NAME>)
.add_property("nv_compute_capability", &detail::nv_compute_capability)
.add_property("platform", &detail::get_platform)
WRAP("double_fp_config", CL_DEVICE_DOUBLE_FP_CONFIG)
WRAP("name", CL_DEVICE_NAME)
WRAP("type", CL_DEVICE_TYPE)
WRAP("vendor", CL_DEVICE_VENDOR)
#undef WRAP
;
bp::class_<atd::cl::Context>("context", bp::init<atd::cl::Device>())
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_context_info<NAME>)
#undef WRAP
.add_property("queues", bp::make_function(&detail::get_queue, bp::return_internal_reference<>()))
;
bp::class_<atd::cl::CommandQueue>("command_queue", bp::no_init)
.add_property("device", &detail::get_device)
bp::class_<atd::cl::CommandQueue>("command_queue", bp::init<atd::cl::Context, atd::cl::Device>())
#define WRAP(PYNAME, NAME) .add_property(PYNAME, &detail::wrap_command_queue_info<NAME>)
WRAP("device", CL_QUEUE_DEVICE)
#undef WRAP
.add_property("models", bp::make_function(&atd::get_model_map, bp::return_internal_reference<>()));
;
bp::def("synchronize", &atd::cl::synchronize);
bp::def("get_platforms", &detail::get_platforms);
}
namespace detail
@@ -446,6 +524,7 @@ void export_array()
.def(bp::init<atd::array_expression>())
.add_property("dtype", &atd::array::dtype)
.add_property("context", bp::make_function(&atd::array::context, bp::return_internal_reference<>()))
.add_property("T", &atd::array::T)
// .add_property("shape", &detail::get_shape, &detail::set_shape)
ADD_ARRAY_OPERATOR(+)
ADD_ARRAY_OPERATOR(-)
@@ -477,8 +556,8 @@ void export_array()
bp::def(#name, static_cast<atd::array_expression (*)(atd::array const &, atd::array_expression const &)>(&atd::name));\
bp::def(#name, static_cast<atd::array_expression (*)(atd::array_expression const &, atd::array_expression const &)>(&atd::name));
MAP_FUNCTION(max)
MAP_FUNCTION(min)
MAP_FUNCTION(maximum)
MAP_FUNCTION(minimum)
MAP_FUNCTION(pow)
MAP_FUNCTION(dot)
#undef MAP_FUNCTION
@@ -551,21 +630,24 @@ void export_model()
#undef __PROP
}
#define WRAP_TEMPLATE(name, ...) bp::class_<atidlas::base_impl<atidlas::name, atidlas::name::parameters_type>, bp::bases<atidlas::base>, boost::noncopyable>(#name "_base_impl", bp::no_init);\
bp::class_<atidlas::name, bp::bases<atidlas::base_impl<atidlas::name, atidlas::name::parameters_type> > >(#name, bp::init<__VA_ARGS__>())\
#define WRAP_BASE(name) bp::class_<atidlas::base_impl<atidlas::name, atidlas::name::parameters_type>, bp::bases<atidlas::base>, boost::noncopyable>(#name "_base_impl", bp::no_init);
#define WRAP_TEMPLATE(name, basename, ...) bp::class_<atidlas::name, bp::bases<atidlas::base_impl<atidlas::basename, atidlas::basename::parameters_type> > >(#name, bp::init<__VA_ARGS__>())\
.add_property("local_size_0", &atd::name::local_size_0)\
.add_property("local_size_1", &atd::name::local_size_1);
#define WRAP_SINGLE_TEMPLATE(name, ...) WRAP_BASE(name) WRAP_TEMPLATE(name, name, __VA_ARGS__)
//Vector AXPY
WRAP_TEMPLATE(vaxpy, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_TEMPLATE(maxpy, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_TEMPLATE(reduction, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_TEMPLATE(mreduction_rows, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_TEMPLATE(mreduction_cols, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_TEMPLATE(mproduct_nn, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_tn, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_nt, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_tt, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_SINGLE_TEMPLATE(vaxpy, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(maxpy, uint, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_SINGLE_TEMPLATE(reduction, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_BASE(mreduction)
WRAP_TEMPLATE(mreduction_rows, mreduction, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_TEMPLATE(mreduction_cols, mreduction, uint, uint, uint, uint, atidlas::fetching_policy_type)
WRAP_BASE(mproduct)
WRAP_TEMPLATE(mproduct_nn, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_tn, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_nt, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
WRAP_TEMPLATE(mproduct_tt, mproduct, uint, uint, uint, uint, uint, uint, uint, atidlas::fetching_policy_type, atidlas::fetching_policy_type, uint, uint)
}