JIT: No longer using fallbacks for stride[0] > 1

It was pretty messy.
This commit is contained in:
Philippe Tillet
2016-04-10 16:31:29 -04:00
parent 81139e0642
commit 1e439ad5bc
20 changed files with 5232 additions and 113 deletions

View File

@@ -45,9 +45,10 @@ public:
enum Type
{
GPU = CL_DEVICE_TYPE_GPU,
CPU = CL_DEVICE_TYPE_CPU,
ACCELERATOR = CL_DEVICE_TYPE_ACCELERATOR
GPU = CL_DEVICE_TYPE_GPU,
CPU = CL_DEVICE_TYPE_CPU,
ACCELERATOR = CL_DEVICE_TYPE_ACCELERATOR,
UNKNOWN
};
enum class Vendor

View File

@@ -81,8 +81,6 @@ public:
unsigned int local_size_1;
unsigned int num_kernels;
};
protected:
static bool requires_fallback(expression_tree const & expressions);
private:
virtual std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & mapping) const = 0;
public:
@@ -94,7 +92,7 @@ public:
virtual ~base();
std::string generate(std::string const & suffix, expression_tree const & expressions, driver::Device const & device);
virtual int is_invalid(expression_tree const & expressions, driver::Device const & device) const = 0;
virtual void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const & expressions) = 0;
virtual void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & expressions) = 0;
virtual std::shared_ptr<base> clone() const = 0;
private:
fusion_policy_t fusion_policy_;

View File

@@ -46,7 +46,7 @@ public:
elementwise_1d(elementwise_1d::parameters_type const & parameters, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
elementwise_1d(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
};
}

View File

@@ -49,7 +49,7 @@ public:
elementwise_2d(parameters_type const & parameters, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, fusion_policy_t bind = FUSE_INDEPENDENT);
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
};
}

View File

@@ -71,14 +71,13 @@ private:
value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, runtime::execution_options_type const & options);
std::vector<int_t> infos(expression_tree const & expressions, isaac::symbolic::preset::matrix_product::args &arguments) const;
public:
matrix_product(matrix_product::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans);
matrix_product(matrix_product::parameters_type const & parameters, char A_trans, char B_trans);
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &ctr);
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &ctr);
private:
const char A_trans_;
const char B_trans_;
expression_type type_;
bool check_bounds_;
};
class matrix_product_nn : public matrix_product
@@ -86,7 +85,7 @@ class matrix_product_nn : public matrix_product
public:
matrix_product_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
, int_t lfetch0, int_t lfetch1);
};
class matrix_product_tn : public matrix_product
@@ -94,7 +93,7 @@ class matrix_product_tn : public matrix_product
public:
matrix_product_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
, int_t lfetch0, int_t lfetch1);
};
@@ -103,7 +102,7 @@ class matrix_product_nt : public matrix_product
public:
matrix_product_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
, int_t lfetch0, int_t lfetch1);
};
@@ -112,7 +111,7 @@ class matrix_product_tt : public matrix_product
public:
matrix_product_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
, int_t lfetch0, int_t lfetch1);
};
}

View File

@@ -52,7 +52,7 @@ public:
reduce_1d(reduce_1d::parameters_type const & parameters, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, fusion_policy_t bind = FUSE_INDEPENDENT);
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
private:
std::vector< driver::Buffer > tmp_;
std::vector< driver::Buffer > tmpidx_;

View File

@@ -53,7 +53,7 @@ private:
std::string generate_impl(std::string const & suffix, expression_tree const &, driver::Device const & device, symbolic::symbols_table const &) const;
public:
virtual std::vector<int_t> input_sizes(expression_tree const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
private:
operation_type_family reduction_type_;
};

View File

@@ -59,7 +59,6 @@ public:
private:
templates_container templates_;
template_pointer fallback_;
std::shared_ptr<predictors::random_forest> predictor_;
std::map<std::vector<int_t>, int> hardcoded_;
driver::CommandQueue queue_;
@@ -80,8 +79,6 @@ private:
static std::map<driver::CommandQueue, map_type> cache_;
};
extern std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::base> > fallbacks;
}
}

View File

@@ -12,7 +12,7 @@ endif()
#Database
if(NOT ANDROID)
#Presets
foreach(VENDOR amd intel nvidia)
foreach(VENDOR unknown amd intel nvidia)
set(DATABASE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/runtime/inference/database/${VENDOR}/")
file(GLOB_RECURSE JSON_FILES "${DATABASE_PATH}/json/*.json")
CODE_TO_H(SOURCES ${JSON_FILES} VARNAME database EXTENSION "hpp" OUTPUT_DIR "${DATABASE_PATH}"

View File

@@ -43,15 +43,6 @@ namespace templates
base::parameters_type::parameters_type(unsigned int _simd_width, int_t _local_size_1, int_t _local_size_2, int_t _num_kernels) : simd_width(_simd_width), local_size_0(_local_size_1), local_size_1(_local_size_2), num_kernels(_num_kernels)
{ }
bool base::requires_fallback(expression_tree const & expression)
{
for(expression_tree::node const & node: expression.data())
if(node.type==DENSE_ARRAY_TYPE && (node.ld[0]>1 || node.array.start>0))
return true;
return false;
}
base::base(fusion_policy_t fusion_policy) : fusion_policy_(fusion_policy)
{}

View File

@@ -134,7 +134,7 @@ std::vector<int_t> elementwise_1d::input_sizes(expression_tree const & expressio
return {max(expressions.shape())};
}
void elementwise_1d::enqueue(driver::CommandQueue &, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
void elementwise_1d::enqueue(driver::CommandQueue &, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
{
expression_tree const & expressions = control.x();
//Size

View File

@@ -125,7 +125,7 @@ std::vector<int_t> elementwise_2d::input_sizes(expression_tree const & expressi
return expression.shape();
}
void elementwise_2d::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
void elementwise_2d::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
{
expression_tree const & expressions = control.x();
std::string name = "elementwise_2d";

View File

@@ -132,11 +132,12 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
#define VLOAD(offset, ptr) vload(p_.simd_width, sdtype, offset, ptr, "1", backend, true)
#define VLOAD_MISALIGNED(offset, ptr) vload(p_.simd_width, sdtype, offset, ptr, "1", backend, false)
#define VSTORE(value, offset, ptr) vstore(p_.simd_width, sdtype, value, offset, ptr, "1", backend)
#define ASTRIDE1 string(check_bounds_?"*Astride1":"")
#define BSTRIDE1 string(check_bounds_?"*Bstride1":"")
#define CSTRIDE1 string(check_bounds_?"*Cstride1":"")
symbolic::preset::matrix_product::args args;
infos(tree, args);
std::string ASTRIDE1 = (args.A->ld[0] > 1)?"*Astride1":"";
std::string BSTRIDE1 = (args.B->ld[0] > 1)?"*Bstride1":"";
std::string CSTRIDE1 = (args.C->ld[0] > 1)?"*Cstride1":"";
//////////////////
/// INIT
@@ -681,7 +682,7 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
return {M, N, K};
}
matrix_product::matrix_product(matrix_product_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<matrix_product, matrix_product_parameters>(parameters, FUSE_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds)
matrix_product::matrix_product(matrix_product_parameters const & parameters, char A_trans, char B_trans) : base_impl<matrix_product, matrix_product_parameters>(parameters, FUSE_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans)
{
if(A_trans_=='N' && B_trans_=='N') type_ = MATRIX_PRODUCT_NN;
else if(A_trans_=='T' && B_trans_=='N') type_ = MATRIX_PRODUCT_TN;
@@ -696,14 +697,9 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
return infos((expression_tree&)expressions, dummy);
}
void matrix_product::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, runtime::execution_handler const & control)
void matrix_product::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
{
using namespace tools;
matrix_product & fallback = (matrix_product&)fallback_base;
expression_tree const & expressions = control.x();
symbolic::preset::matrix_product::args args;
std::vector<int_t> MNK = infos(expressions, args);
int_t M = MNK[0];
@@ -714,10 +710,7 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
return;
//Enqueue
runtime::execution_options_type const & options = control.execution_options();
if (args.A->ld[0] > 1 || args.B->ld[0] > 1 || args.C->ld[0] > 1)
fallback.enqueue_block(queue, M, N, K, *args.A, *args.B, *args.C, args.alpha, args.beta, program, "fallback", options);
else
enqueue_block(queue, M, N, K, *args.A, *args.B, *args.C, args.alpha, args.beta, program, suffix, options);
enqueue_block(queue, M, N, K, *args.A, *args.B, *args.C, args.alpha, args.beta, program, suffix, options);
}
//
@@ -725,8 +718,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
, int_t lfetch0, int_t lfetch1) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'N', 'N')
{
}
@@ -735,8 +728,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N')
, int_t lfetch0, int_t lfetch1) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'T', 'N')
{ }
//
@@ -744,8 +737,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T')
, int_t lfetch0, int_t lfetch1) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'N', 'T')
{ }
//
@@ -753,8 +746,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T')
, int_t lfetch0, int_t lfetch1) :
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'T', 'T')
{ }
}

View File

@@ -269,7 +269,7 @@ std::vector<int_t> reduce_1d::input_sizes(expression_tree const & x) const
return {max(x[lhs].shape)};
}
void reduce_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
void reduce_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
{
expression_tree const & x = control.x();

View File

@@ -313,7 +313,7 @@ std::vector<int_t> reduce_2d::input_sizes(expression_tree const & tree) const
return {shape[0], shape[1]};
}
void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
{
expression_tree const & tree = control.x();
std::vector<int_t> MN = input_sizes(tree);

View File

@@ -22,6 +22,9 @@
#include "isaac/driver/device.h"
#include "isaac/runtime/inference/profiles.h"
//Default
#include "database/unknown/unknown.hpp"
//Intel
#include "database/intel/broadwell.hpp"
@@ -45,6 +48,8 @@ namespace runtime
const profiles::presets_type profiles::presets_ =
{
//DEFAULT
DATABASE_ENTRY(UNKNOWN, UNKNOWN, UNKNOWN, database::unknown::unknown),
//INTEL
DATABASE_ENTRY(GPU, INTEL, BROADWELL, database::intel::broadwell),
//NVIDIA

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -65,21 +65,19 @@ driver::Program const & profiles::value_type::init(runtime::execution_handler co
return *program;
std::string srcs;
for(unsigned int i = 0 ; i < templates_.size() ; ++i){
for(unsigned int i = 0 ; i < templates_.size() ; ++i)
srcs += templates_[i]->generate(tools::to_string(i), expression.x(), context.device());
}
srcs += fallback_->generate("fallback", expression.x(), context.device());
return cache_.add(context, pname, srcs);
}
profiles::value_type::value_type(expression_type etype, numeric_type dtype, predictors::random_forest const & predictor, std::vector< std::shared_ptr<templates::base> > const & templates, driver::CommandQueue const & queue) :
templates_(templates), fallback_(fallbacks[std::make_pair(etype, dtype)]), predictor_(new predictors::random_forest(predictor)), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
templates_(templates), predictor_(new predictors::random_forest(predictor)), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
{
cache_.clear();
}
profiles::value_type::value_type(expression_type etype, numeric_type dtype, templates::base const & tp, driver::CommandQueue const & queue) : templates_(1,tp.clone()), fallback_(fallbacks[std::make_pair(etype, dtype)]), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
profiles::value_type::value_type(expression_type etype, numeric_type dtype, templates::base const & tp, driver::CommandQueue const & queue) : templates_(1,tp.clone()), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
{
cache_.clear();
}
@@ -102,7 +100,7 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
}
std::list<driver::Event> events;
try{
templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, runtime::execution_handler(expr.x(), runtime::execution_options_type(0, &events)));
templates_[i]->enqueue(queue_, program, tools::to_string(i), runtime::execution_handler(expr.x(), runtime::execution_options_type(0, &events)));
queue_.synchronize();
timings[i] = 1e-9*std::accumulate(events.begin(), events.end(), 0, &time_event);
}catch(...){
@@ -115,7 +113,6 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
}
//Prediction
int label = 0;
if(expr.dispatcher_options().label>=0)
label = expr.dispatcher_options().label;
@@ -134,7 +131,7 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
if(templates_[label]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE)
throw operation_not_supported_exception("Running this operation would require an overly large temporary.");
return templates_[label]->enqueue(queue_, program, tools::to_string(label), *fallback_, expr);
return templates_[label]->enqueue(queue_, program, tools::to_string(label), expr);
}
profiles::value_type::templates_container const & profiles::value_type::templates() const
@@ -210,30 +207,26 @@ void profiles::import(std::string const & str, driver::CommandQueue const & queu
profiles::map_type& profiles::init(driver::CommandQueue const & queue)
{
map_type & result = cache_[queue];
numeric_type dtypes[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
expression_type etypes[] = {ELEMENTWISE_1D, REDUCE_1D, ELEMENTWISE_2D, REDUCE_2D_ROWS, REDUCE_2D_COLS, MATRIX_PRODUCT_NN, MATRIX_PRODUCT_NT, MATRIX_PRODUCT_TN, MATRIX_PRODUCT_TT};
for(numeric_type dtype: dtypes)
for(expression_type etype: etypes)
result[std::make_pair(etype, dtype)] = std::shared_ptr<value_type>(new value_type(etype, dtype, *fallbacks[std::make_pair(etype, dtype)], queue));
map_type & map = cache_[queue];
driver::Device const & device = queue.device();
presets_type::const_iterator it = presets_.find(std::make_tuple(device.type(), device.vendor(), device.architecture()));
/*-- Device not found in database --*/
if(it==presets_.end()){
//FIXME: Hadle this case
// import(presets_.at(std::make_tuple(device.type(), device.vendor(), driver::Device::Architecture::UNKNOWN)), queue);
import(presets_.at(std::make_tuple(driver::Device::Type::UNKNOWN, driver::Device::Vendor::UNKNOWN, driver::Device::Architecture::UNKNOWN)), queue);
}
else
/*-- Device found in database --*/
else{
import(it->second, queue);
}
/*-- User-provided profile --*/
std::string homepath = tools::getenv("HOME");
if(homepath.size())
{
std::string json_path = homepath + "/.isaac/devices/device0.json";
std::ifstream t(json_path);
if(!t)
return result;
return map;
std::string str;
t.seekg(0, std::ios::end);
str.reserve(t.tellg());
@@ -242,7 +235,7 @@ profiles::map_type& profiles::init(driver::CommandQueue const & queue)
import(str, queue);
}
return result;
return map;
}
profiles::map_type& profiles::get(driver::CommandQueue const & queue)
@@ -254,43 +247,12 @@ profiles::map_type& profiles::get(driver::CommandQueue const & queue)
}
void profiles::set(driver::CommandQueue const & queue, expression_type operation, numeric_type dtype, std::shared_ptr<value_type> const & profile)
{
cache_[queue][std::make_pair(operation,dtype)] = profile;
}
{ cache_[queue][std::make_pair(operation,dtype)] = profile; }
void profiles::release()
{
cache_.clear();
}
{ cache_.clear(); }
std::map<driver::CommandQueue, profiles::map_type> profiles::cache_;
///////////////////
//
std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::base> > init_fallback()
{
typedef std::shared_ptr<templates::base> ptr_t;
std::map<std::pair<expression_type, numeric_type>, ptr_t > res;
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
for(auto DTYPE : types)
{
res[std::make_pair(ELEMENTWISE_1D, DTYPE)] = ptr_t (new templates::elementwise_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(REDUCE_1D, DTYPE)] = ptr_t(new templates::reduce_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(ELEMENTWISE_2D, DTYPE)] = ptr_t(new templates::elementwise_2d(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(REDUCE_2D_ROWS, DTYPE)] = ptr_t(new templates::reduce_2d_rows(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(REDUCE_2D_COLS, DTYPE)] = ptr_t(new templates::reduce_2d_cols(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(MATRIX_PRODUCT_NN, DTYPE)] = ptr_t(new templates::matrix_product_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(MATRIX_PRODUCT_TN, DTYPE)] = ptr_t(new templates::matrix_product_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(MATRIX_PRODUCT_NT, DTYPE)] = ptr_t(new templates::matrix_product_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(MATRIX_PRODUCT_TT, DTYPE)] = ptr_t(new templates::matrix_product_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
}
return res;
}
std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::base> > fallbacks = init_fallback();
}
}

View File

@@ -73,7 +73,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/random/rand.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/matrix_product.cpp src/lib/jit/generation/base.cpp src/lib/runtime/execute.cpp src/lib/runtime/inference/database.cpp src/lib/runtime/inference/profiles.cpp src/lib/runtime/inference/predictors/random_forest.cpp src/lib/runtime/scheduler/dag.cpp src/lib/runtime/scheduler/strategies/heft.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/array.cpp src/lib/value_scalar.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/matrix_product.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/runtime/inference/predictors/random_forest.cpp src/lib/runtime/inference/profiles.cpp src/lib/runtime/inference/database.cpp src/lib/runtime/execute.cpp src/lib/runtime/scheduler/strategies/heft.cpp src/lib/runtime/scheduler/dag.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp src/lib/api/blas/cublas.cpp src/lib/api/blas/clBLAS.cpp src/lib/random/rand.cpp src/lib/driver/dispatch.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/buffer.cpp src/lib/driver/program.cpp src/lib/driver/backend.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/program_cache.cpp src/lib/driver/kernel.cpp src/lib/driver/platform.cpp src/lib/driver/handle.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]