JIT: No longer using fallbacks for stride[0] > 1
It was pretty messy.
This commit is contained in:
@@ -45,9 +45,10 @@ public:
|
||||
|
||||
enum Type
|
||||
{
|
||||
GPU = CL_DEVICE_TYPE_GPU,
|
||||
CPU = CL_DEVICE_TYPE_CPU,
|
||||
ACCELERATOR = CL_DEVICE_TYPE_ACCELERATOR
|
||||
GPU = CL_DEVICE_TYPE_GPU,
|
||||
CPU = CL_DEVICE_TYPE_CPU,
|
||||
ACCELERATOR = CL_DEVICE_TYPE_ACCELERATOR,
|
||||
UNKNOWN
|
||||
};
|
||||
|
||||
enum class Vendor
|
||||
|
@@ -81,8 +81,6 @@ public:
|
||||
unsigned int local_size_1;
|
||||
unsigned int num_kernels;
|
||||
};
|
||||
protected:
|
||||
static bool requires_fallback(expression_tree const & expressions);
|
||||
private:
|
||||
virtual std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & mapping) const = 0;
|
||||
public:
|
||||
@@ -94,7 +92,7 @@ public:
|
||||
virtual ~base();
|
||||
std::string generate(std::string const & suffix, expression_tree const & expressions, driver::Device const & device);
|
||||
virtual int is_invalid(expression_tree const & expressions, driver::Device const & device) const = 0;
|
||||
virtual void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const & expressions) = 0;
|
||||
virtual void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & expressions) = 0;
|
||||
virtual std::shared_ptr<base> clone() const = 0;
|
||||
private:
|
||||
fusion_policy_t fusion_policy_;
|
||||
|
@@ -46,7 +46,7 @@ public:
|
||||
elementwise_1d(elementwise_1d::parameters_type const & parameters, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
|
||||
elementwise_1d(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -49,7 +49,7 @@ public:
|
||||
elementwise_2d(parameters_type const & parameters, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
|
||||
elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, fusion_policy_t bind = FUSE_INDEPENDENT);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -71,14 +71,13 @@ private:
|
||||
value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, runtime::execution_options_type const & options);
|
||||
std::vector<int_t> infos(expression_tree const & expressions, isaac::symbolic::preset::matrix_product::args &arguments) const;
|
||||
public:
|
||||
matrix_product(matrix_product::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans);
|
||||
matrix_product(matrix_product::parameters_type const & parameters, char A_trans, char B_trans);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &ctr);
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &ctr);
|
||||
private:
|
||||
const char A_trans_;
|
||||
const char B_trans_;
|
||||
expression_type type_;
|
||||
bool check_bounds_;
|
||||
};
|
||||
|
||||
class matrix_product_nn : public matrix_product
|
||||
@@ -86,7 +85,7 @@ class matrix_product_nn : public matrix_product
|
||||
public:
|
||||
matrix_product_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
, int_t lfetch0, int_t lfetch1);
|
||||
};
|
||||
|
||||
class matrix_product_tn : public matrix_product
|
||||
@@ -94,7 +93,7 @@ class matrix_product_tn : public matrix_product
|
||||
public:
|
||||
matrix_product_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
, int_t lfetch0, int_t lfetch1);
|
||||
};
|
||||
|
||||
|
||||
@@ -103,7 +102,7 @@ class matrix_product_nt : public matrix_product
|
||||
public:
|
||||
matrix_product_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
, int_t lfetch0, int_t lfetch1);
|
||||
};
|
||||
|
||||
|
||||
@@ -112,7 +111,7 @@ class matrix_product_tt : public matrix_product
|
||||
public:
|
||||
matrix_product_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
, int_t lfetch0, int_t lfetch1);
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -52,7 +52,7 @@ public:
|
||||
reduce_1d(reduce_1d::parameters_type const & parameters, fusion_policy_t fusion_policy = FUSE_INDEPENDENT);
|
||||
reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, fusion_policy_t bind = FUSE_INDEPENDENT);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
private:
|
||||
std::vector< driver::Buffer > tmp_;
|
||||
std::vector< driver::Buffer > tmpidx_;
|
||||
|
@@ -53,7 +53,7 @@ private:
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const &, driver::Device const & device, symbolic::symbols_table const &) const;
|
||||
public:
|
||||
virtual std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, runtime::execution_handler const &);
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
private:
|
||||
operation_type_family reduction_type_;
|
||||
};
|
||||
|
@@ -59,7 +59,6 @@ public:
|
||||
|
||||
private:
|
||||
templates_container templates_;
|
||||
template_pointer fallback_;
|
||||
std::shared_ptr<predictors::random_forest> predictor_;
|
||||
std::map<std::vector<int_t>, int> hardcoded_;
|
||||
driver::CommandQueue queue_;
|
||||
@@ -80,8 +79,6 @@ private:
|
||||
static std::map<driver::CommandQueue, map_type> cache_;
|
||||
};
|
||||
|
||||
extern std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::base> > fallbacks;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -12,7 +12,7 @@ endif()
|
||||
#Database
|
||||
if(NOT ANDROID)
|
||||
#Presets
|
||||
foreach(VENDOR amd intel nvidia)
|
||||
foreach(VENDOR unknown amd intel nvidia)
|
||||
set(DATABASE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/runtime/inference/database/${VENDOR}/")
|
||||
file(GLOB_RECURSE JSON_FILES "${DATABASE_PATH}/json/*.json")
|
||||
CODE_TO_H(SOURCES ${JSON_FILES} VARNAME database EXTENSION "hpp" OUTPUT_DIR "${DATABASE_PATH}"
|
||||
|
@@ -43,15 +43,6 @@ namespace templates
|
||||
base::parameters_type::parameters_type(unsigned int _simd_width, int_t _local_size_1, int_t _local_size_2, int_t _num_kernels) : simd_width(_simd_width), local_size_0(_local_size_1), local_size_1(_local_size_2), num_kernels(_num_kernels)
|
||||
{ }
|
||||
|
||||
|
||||
bool base::requires_fallback(expression_tree const & expression)
|
||||
{
|
||||
for(expression_tree::node const & node: expression.data())
|
||||
if(node.type==DENSE_ARRAY_TYPE && (node.ld[0]>1 || node.array.start>0))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
base::base(fusion_policy_t fusion_policy) : fusion_policy_(fusion_policy)
|
||||
{}
|
||||
|
||||
|
@@ -134,7 +134,7 @@ std::vector<int_t> elementwise_1d::input_sizes(expression_tree const & expressio
|
||||
return {max(expressions.shape())};
|
||||
}
|
||||
|
||||
void elementwise_1d::enqueue(driver::CommandQueue &, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
|
||||
void elementwise_1d::enqueue(driver::CommandQueue &, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
|
||||
{
|
||||
expression_tree const & expressions = control.x();
|
||||
//Size
|
||||
|
@@ -125,7 +125,7 @@ std::vector<int_t> elementwise_2d::input_sizes(expression_tree const & expressi
|
||||
return expression.shape();
|
||||
}
|
||||
|
||||
void elementwise_2d::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
|
||||
void elementwise_2d::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
|
||||
{
|
||||
expression_tree const & expressions = control.x();
|
||||
std::string name = "elementwise_2d";
|
||||
|
@@ -132,11 +132,12 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
#define VLOAD(offset, ptr) vload(p_.simd_width, sdtype, offset, ptr, "1", backend, true)
|
||||
#define VLOAD_MISALIGNED(offset, ptr) vload(p_.simd_width, sdtype, offset, ptr, "1", backend, false)
|
||||
#define VSTORE(value, offset, ptr) vstore(p_.simd_width, sdtype, value, offset, ptr, "1", backend)
|
||||
#define ASTRIDE1 string(check_bounds_?"*Astride1":"")
|
||||
#define BSTRIDE1 string(check_bounds_?"*Bstride1":"")
|
||||
#define CSTRIDE1 string(check_bounds_?"*Cstride1":"")
|
||||
|
||||
|
||||
symbolic::preset::matrix_product::args args;
|
||||
infos(tree, args);
|
||||
std::string ASTRIDE1 = (args.A->ld[0] > 1)?"*Astride1":"";
|
||||
std::string BSTRIDE1 = (args.B->ld[0] > 1)?"*Bstride1":"";
|
||||
std::string CSTRIDE1 = (args.C->ld[0] > 1)?"*Cstride1":"";
|
||||
|
||||
//////////////////
|
||||
/// INIT
|
||||
@@ -681,7 +682,7 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
return {M, N, K};
|
||||
}
|
||||
|
||||
matrix_product::matrix_product(matrix_product_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<matrix_product, matrix_product_parameters>(parameters, FUSE_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds)
|
||||
matrix_product::matrix_product(matrix_product_parameters const & parameters, char A_trans, char B_trans) : base_impl<matrix_product, matrix_product_parameters>(parameters, FUSE_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans)
|
||||
{
|
||||
if(A_trans_=='N' && B_trans_=='N') type_ = MATRIX_PRODUCT_NN;
|
||||
else if(A_trans_=='T' && B_trans_=='N') type_ = MATRIX_PRODUCT_TN;
|
||||
@@ -696,14 +697,9 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
return infos((expression_tree&)expressions, dummy);
|
||||
}
|
||||
|
||||
void matrix_product::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, runtime::execution_handler const & control)
|
||||
void matrix_product::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
|
||||
{
|
||||
using namespace tools;
|
||||
|
||||
matrix_product & fallback = (matrix_product&)fallback_base;
|
||||
expression_tree const & expressions = control.x();
|
||||
|
||||
|
||||
symbolic::preset::matrix_product::args args;
|
||||
std::vector<int_t> MNK = infos(expressions, args);
|
||||
int_t M = MNK[0];
|
||||
@@ -714,10 +710,7 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
return;
|
||||
//Enqueue
|
||||
runtime::execution_options_type const & options = control.execution_options();
|
||||
if (args.A->ld[0] > 1 || args.B->ld[0] > 1 || args.C->ld[0] > 1)
|
||||
fallback.enqueue_block(queue, M, N, K, *args.A, *args.B, *args.C, args.alpha, args.beta, program, "fallback", options);
|
||||
else
|
||||
enqueue_block(queue, M, N, K, *args.A, *args.B, *args.C, args.alpha, args.beta, program, suffix, options);
|
||||
enqueue_block(queue, M, N, K, *args.A, *args.B, *args.C, args.alpha, args.beta, program, suffix, options);
|
||||
}
|
||||
|
||||
//
|
||||
@@ -725,8 +718,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
|
||||
, int_t lfetch0, int_t lfetch1) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'N', 'N')
|
||||
{
|
||||
}
|
||||
|
||||
@@ -735,8 +728,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N')
|
||||
, int_t lfetch0, int_t lfetch1) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'T', 'N')
|
||||
{ }
|
||||
|
||||
//
|
||||
@@ -744,8 +737,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T')
|
||||
, int_t lfetch0, int_t lfetch1) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'N', 'T')
|
||||
{ }
|
||||
|
||||
//
|
||||
@@ -753,8 +746,8 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T')
|
||||
, int_t lfetch0, int_t lfetch1) :
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), 'T', 'T')
|
||||
{ }
|
||||
|
||||
}
|
||||
|
@@ -269,7 +269,7 @@ std::vector<int_t> reduce_1d::input_sizes(expression_tree const & x) const
|
||||
return {max(x[lhs].shape)};
|
||||
}
|
||||
|
||||
void reduce_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
|
||||
void reduce_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
|
||||
{
|
||||
expression_tree const & x = control.x();
|
||||
|
||||
|
@@ -313,7 +313,7 @@ std::vector<int_t> reduce_2d::input_sizes(expression_tree const & tree) const
|
||||
return {shape[0], shape[1]};
|
||||
}
|
||||
|
||||
void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base &, runtime::execution_handler const & control)
|
||||
void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
|
||||
{
|
||||
expression_tree const & tree = control.x();
|
||||
std::vector<int_t> MN = input_sizes(tree);
|
||||
|
@@ -22,6 +22,9 @@
|
||||
#include "isaac/driver/device.h"
|
||||
#include "isaac/runtime/inference/profiles.h"
|
||||
|
||||
//Default
|
||||
#include "database/unknown/unknown.hpp"
|
||||
|
||||
//Intel
|
||||
#include "database/intel/broadwell.hpp"
|
||||
|
||||
@@ -45,6 +48,8 @@ namespace runtime
|
||||
|
||||
const profiles::presets_type profiles::presets_ =
|
||||
{
|
||||
//DEFAULT
|
||||
DATABASE_ENTRY(UNKNOWN, UNKNOWN, UNKNOWN, database::unknown::unknown),
|
||||
//INTEL
|
||||
DATABASE_ENTRY(GPU, INTEL, BROADWELL, database::intel::broadwell),
|
||||
//NVIDIA
|
||||
|
1
lib/runtime/inference/database/unknown/json/unknown.json
Normal file
1
lib/runtime/inference/database/unknown/json/unknown.json
Normal file
File diff suppressed because one or more lines are too long
5172
lib/runtime/inference/database/unknown/unknown.hpp
Normal file
5172
lib/runtime/inference/database/unknown/unknown.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -65,21 +65,19 @@ driver::Program const & profiles::value_type::init(runtime::execution_handler co
|
||||
return *program;
|
||||
|
||||
std::string srcs;
|
||||
for(unsigned int i = 0 ; i < templates_.size() ; ++i){
|
||||
for(unsigned int i = 0 ; i < templates_.size() ; ++i)
|
||||
srcs += templates_[i]->generate(tools::to_string(i), expression.x(), context.device());
|
||||
}
|
||||
srcs += fallback_->generate("fallback", expression.x(), context.device());
|
||||
return cache_.add(context, pname, srcs);
|
||||
}
|
||||
|
||||
profiles::value_type::value_type(expression_type etype, numeric_type dtype, predictors::random_forest const & predictor, std::vector< std::shared_ptr<templates::base> > const & templates, driver::CommandQueue const & queue) :
|
||||
templates_(templates), fallback_(fallbacks[std::make_pair(etype, dtype)]), predictor_(new predictors::random_forest(predictor)), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
|
||||
templates_(templates), predictor_(new predictors::random_forest(predictor)), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
|
||||
{
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
|
||||
profiles::value_type::value_type(expression_type etype, numeric_type dtype, templates::base const & tp, driver::CommandQueue const & queue) : templates_(1,tp.clone()), fallback_(fallbacks[std::make_pair(etype, dtype)]), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
|
||||
profiles::value_type::value_type(expression_type etype, numeric_type dtype, templates::base const & tp, driver::CommandQueue const & queue) : templates_(1,tp.clone()), queue_(queue), cache_(driver::backend::programs::get(queue,etype,dtype))
|
||||
{
|
||||
cache_.clear();
|
||||
}
|
||||
@@ -102,7 +100,7 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
|
||||
}
|
||||
std::list<driver::Event> events;
|
||||
try{
|
||||
templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, runtime::execution_handler(expr.x(), runtime::execution_options_type(0, &events)));
|
||||
templates_[i]->enqueue(queue_, program, tools::to_string(i), runtime::execution_handler(expr.x(), runtime::execution_options_type(0, &events)));
|
||||
queue_.synchronize();
|
||||
timings[i] = 1e-9*std::accumulate(events.begin(), events.end(), 0, &time_event);
|
||||
}catch(...){
|
||||
@@ -115,7 +113,6 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
|
||||
}
|
||||
|
||||
//Prediction
|
||||
|
||||
int label = 0;
|
||||
if(expr.dispatcher_options().label>=0)
|
||||
label = expr.dispatcher_options().label;
|
||||
@@ -134,7 +131,7 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
|
||||
if(templates_[label]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE)
|
||||
throw operation_not_supported_exception("Running this operation would require an overly large temporary.");
|
||||
|
||||
return templates_[label]->enqueue(queue_, program, tools::to_string(label), *fallback_, expr);
|
||||
return templates_[label]->enqueue(queue_, program, tools::to_string(label), expr);
|
||||
}
|
||||
|
||||
profiles::value_type::templates_container const & profiles::value_type::templates() const
|
||||
@@ -210,30 +207,26 @@ void profiles::import(std::string const & str, driver::CommandQueue const & queu
|
||||
|
||||
profiles::map_type& profiles::init(driver::CommandQueue const & queue)
|
||||
{
|
||||
map_type & result = cache_[queue];
|
||||
|
||||
numeric_type dtypes[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
||||
expression_type etypes[] = {ELEMENTWISE_1D, REDUCE_1D, ELEMENTWISE_2D, REDUCE_2D_ROWS, REDUCE_2D_COLS, MATRIX_PRODUCT_NN, MATRIX_PRODUCT_NT, MATRIX_PRODUCT_TN, MATRIX_PRODUCT_TT};
|
||||
|
||||
for(numeric_type dtype: dtypes)
|
||||
for(expression_type etype: etypes)
|
||||
result[std::make_pair(etype, dtype)] = std::shared_ptr<value_type>(new value_type(etype, dtype, *fallbacks[std::make_pair(etype, dtype)], queue));
|
||||
|
||||
map_type & map = cache_[queue];
|
||||
driver::Device const & device = queue.device();
|
||||
presets_type::const_iterator it = presets_.find(std::make_tuple(device.type(), device.vendor(), device.architecture()));
|
||||
/*-- Device not found in database --*/
|
||||
if(it==presets_.end()){
|
||||
//FIXME: Hadle this case
|
||||
// import(presets_.at(std::make_tuple(device.type(), device.vendor(), driver::Device::Architecture::UNKNOWN)), queue);
|
||||
import(presets_.at(std::make_tuple(driver::Device::Type::UNKNOWN, driver::Device::Vendor::UNKNOWN, driver::Device::Architecture::UNKNOWN)), queue);
|
||||
}
|
||||
else
|
||||
/*-- Device found in database --*/
|
||||
else{
|
||||
import(it->second, queue);
|
||||
}
|
||||
|
||||
/*-- User-provided profile --*/
|
||||
std::string homepath = tools::getenv("HOME");
|
||||
if(homepath.size())
|
||||
{
|
||||
std::string json_path = homepath + "/.isaac/devices/device0.json";
|
||||
std::ifstream t(json_path);
|
||||
if(!t)
|
||||
return result;
|
||||
return map;
|
||||
std::string str;
|
||||
t.seekg(0, std::ios::end);
|
||||
str.reserve(t.tellg());
|
||||
@@ -242,7 +235,7 @@ profiles::map_type& profiles::init(driver::CommandQueue const & queue)
|
||||
import(str, queue);
|
||||
}
|
||||
|
||||
return result;
|
||||
return map;
|
||||
}
|
||||
|
||||
profiles::map_type& profiles::get(driver::CommandQueue const & queue)
|
||||
@@ -254,43 +247,12 @@ profiles::map_type& profiles::get(driver::CommandQueue const & queue)
|
||||
}
|
||||
|
||||
void profiles::set(driver::CommandQueue const & queue, expression_type operation, numeric_type dtype, std::shared_ptr<value_type> const & profile)
|
||||
{
|
||||
cache_[queue][std::make_pair(operation,dtype)] = profile;
|
||||
}
|
||||
{ cache_[queue][std::make_pair(operation,dtype)] = profile; }
|
||||
|
||||
void profiles::release()
|
||||
{
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
{ cache_.clear(); }
|
||||
|
||||
std::map<driver::CommandQueue, profiles::map_type> profiles::cache_;
|
||||
|
||||
///////////////////
|
||||
|
||||
//
|
||||
|
||||
std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::base> > init_fallback()
|
||||
{
|
||||
typedef std::shared_ptr<templates::base> ptr_t;
|
||||
std::map<std::pair<expression_type, numeric_type>, ptr_t > res;
|
||||
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
||||
for(auto DTYPE : types)
|
||||
{
|
||||
res[std::make_pair(ELEMENTWISE_1D, DTYPE)] = ptr_t (new templates::elementwise_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(REDUCE_1D, DTYPE)] = ptr_t(new templates::reduce_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(ELEMENTWISE_2D, DTYPE)] = ptr_t(new templates::elementwise_2d(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(REDUCE_2D_ROWS, DTYPE)] = ptr_t(new templates::reduce_2d_rows(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(REDUCE_2D_COLS, DTYPE)] = ptr_t(new templates::reduce_2d_cols(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(MATRIX_PRODUCT_NN, DTYPE)] = ptr_t(new templates::matrix_product_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(MATRIX_PRODUCT_TN, DTYPE)] = ptr_t(new templates::matrix_product_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(MATRIX_PRODUCT_NT, DTYPE)] = ptr_t(new templates::matrix_product_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(MATRIX_PRODUCT_TT, DTYPE)] = ptr_t(new templates::matrix_product_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::base> > fallbacks = init_fallback();
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -73,7 +73,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/random/rand.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/matrix_product.cpp src/lib/jit/generation/base.cpp src/lib/runtime/execute.cpp src/lib/runtime/inference/database.cpp src/lib/runtime/inference/profiles.cpp src/lib/runtime/inference/predictors/random_forest.cpp src/lib/runtime/scheduler/dag.cpp src/lib/runtime/scheduler/strategies/heft.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/array.cpp src/lib/value_scalar.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/matrix_product.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/runtime/inference/predictors/random_forest.cpp src/lib/runtime/inference/profiles.cpp src/lib/runtime/inference/database.cpp src/lib/runtime/execute.cpp src/lib/runtime/scheduler/strategies/heft.cpp src/lib/runtime/scheduler/dag.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp src/lib/api/blas/cublas.cpp src/lib/api/blas/clBLAS.cpp src/lib/random/rand.cpp src/lib/driver/dispatch.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/buffer.cpp src/lib/driver/program.cpp src/lib/driver/backend.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/program_cache.cpp src/lib/driver/kernel.cpp src/lib/driver/platform.cpp src/lib/driver/handle.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
Reference in New Issue
Block a user