Elementwise: Bugfix for FETCH_LOCAL_CONTIGUOUS
This commit is contained in:
@@ -47,7 +47,6 @@ expression_type elementwise_2d::type() const
|
|||||||
|
|
||||||
std::string elementwise_2d::generate_impl(std::string const & suffix, expression_tree const & tree, driver::Device const & device, symbolic::symbols_table const & symbols) const
|
std::string elementwise_2d::generate_impl(std::string const & suffix, expression_tree const & tree, driver::Device const & device, symbolic::symbols_table const & symbols) const
|
||||||
{
|
{
|
||||||
std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1;
|
|
||||||
driver::backend_type backend = device.backend();
|
driver::backend_type backend = device.backend();
|
||||||
kernel_generation_stream stream(backend);
|
kernel_generation_stream stream(backend);
|
||||||
|
|
||||||
@@ -70,35 +69,24 @@ std::string elementwise_2d::generate_impl(std::string const & suffix, expression
|
|||||||
stream << "{" << std::endl;
|
stream << "{" << std::endl;
|
||||||
stream.inc_tab();
|
stream.inc_tab();
|
||||||
|
|
||||||
|
element_wise_loop_1D(stream, fetch_, 1, "i", "M", "$GLOBAL_IDX_0", "$GLOBAL_SIZE_0", device, [&](unsigned int){
|
||||||
|
element_wise_loop_1D(stream, fetch_, 1, "j", "N", "$GLOBAL_IDX_1", "$GLOBAL_SIZE_1", device, [&](unsigned int){
|
||||||
|
//Declares register to store results
|
||||||
|
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assigned_left, false))
|
||||||
|
stream << sym->process("#scalartype #name;") << std::endl;
|
||||||
|
|
||||||
fetching_loop_info(fetch_, "M", stream, init0, upper_bound0, inc0, "$GLOBAL_IDX_0", "$GLOBAL_SIZE_0", device);
|
//Load to registers
|
||||||
stream << "for($SIZE_T i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl;
|
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assigned_right, false))
|
||||||
stream << "{" << std::endl;
|
stream << sym->process("#scalartype #name = at(i, j);") << std::endl;
|
||||||
stream.inc_tab();
|
|
||||||
fetching_loop_info(fetch_, "N", stream, init1, upper_bound1, inc1, "$GLOBAL_IDX_1", "$GLOBAL_SIZE_1", device);
|
|
||||||
stream << "for($SIZE_T j = " << init1 << "; j < " << upper_bound1 << "; j += " << inc1 << ")" << std::endl;
|
|
||||||
stream << "{" << std::endl;
|
|
||||||
stream.inc_tab();
|
|
||||||
|
|
||||||
//Declares register to store results
|
for(std::size_t idx: assigned)
|
||||||
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assigned_left, false))
|
stream << symbols.at(idx)->evaluate({{"leaf", "#name"}}) << ";" << std::endl;
|
||||||
stream << sym->process("#scalartype #name;") << std::endl;
|
|
||||||
|
|
||||||
//Load to registers
|
//Writes back
|
||||||
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assigned_right, false))
|
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assigned_left, false))
|
||||||
stream << sym->process("#scalartype #name = at(i, j);") << std::endl;
|
stream << sym->process("at(i, j) = #name;") << std::endl;
|
||||||
|
});
|
||||||
for(std::size_t idx: assigned)
|
});
|
||||||
stream << symbols.at(idx)->evaluate({{"leaf", "#name"}}) << ";" << std::endl;
|
|
||||||
|
|
||||||
//Writes back
|
|
||||||
for(symbolic::leaf* sym: symbolic::extract<symbolic::leaf>(tree, symbols, assigned_left, false))
|
|
||||||
stream << sym->process("at(i, j) = #name;") << std::endl;
|
|
||||||
|
|
||||||
stream.dec_tab();
|
|
||||||
stream << "}" << std::endl;
|
|
||||||
stream.dec_tab();
|
|
||||||
stream << "}" << std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
stream.dec_tab();
|
stream.dec_tab();
|
||||||
|
@@ -29,13 +29,13 @@ namespace isaac
|
|||||||
namespace templates
|
namespace templates
|
||||||
{
|
{
|
||||||
|
|
||||||
inline void fetching_loop_info(fetch_type policy, std::string const & bound, kernel_generation_stream & stream, std::string & init, std::string & upper_bound, std::string & inc, std::string const & domain_id, std::string const & domain_size, driver::Device const &)
|
inline void fetching_loop_info(fetch_type policy, std::string const & bound, kernel_generation_stream & stream, std::string & init, std::string & upper_bound, std::string & inc, std::string const & domain_id, std::string const & domain_size, driver::Device const &, std::string const & vwidth)
|
||||||
{
|
{
|
||||||
if (policy==FETCH_FROM_GLOBAL_STRIDED)
|
if (policy==FETCH_FROM_GLOBAL_STRIDED)
|
||||||
{
|
{
|
||||||
init = domain_id;
|
init = domain_id + "*" + vwidth;
|
||||||
upper_bound = bound;
|
upper_bound = bound;
|
||||||
inc = domain_size;
|
inc = domain_size + "*" + vwidth;
|
||||||
}
|
}
|
||||||
else if (policy==FETCH_FROM_GLOBAL_CONTIGUOUS)
|
else if (policy==FETCH_FROM_GLOBAL_CONTIGUOUS)
|
||||||
{
|
{
|
||||||
@@ -43,12 +43,12 @@ inline void fetching_loop_info(fetch_type policy, std::string const & bound, ker
|
|||||||
std::string chunk_start = "chunk_start";
|
std::string chunk_start = "chunk_start";
|
||||||
std::string chunk_end = "chunk_end";
|
std::string chunk_end = "chunk_end";
|
||||||
|
|
||||||
stream << "$SIZE_T " << chunk_size << " = (" << bound << "+" << domain_size << "-1)/" << domain_size << ";" << std::endl;
|
stream << "$SIZE_T " << chunk_size << " = " << vwidth << "*(" << bound << "+" << domain_size << "-1)/(" << vwidth << ");" << std::endl;
|
||||||
stream << "$SIZE_T " << chunk_start << " =" << domain_id << "*" << chunk_size << ";" << std::endl;
|
stream << "$SIZE_T " << chunk_start << " =" << domain_id << "*" << chunk_size << ";" << std::endl;
|
||||||
stream << "$SIZE_T " << chunk_end << " = min(" << chunk_start << "+" << chunk_size << ", " << bound << ");" << std::endl;
|
stream << "$SIZE_T " << chunk_end << " = min(" << chunk_start << "+" << chunk_size << ", " << bound << ");" << std::endl;
|
||||||
init = chunk_start;
|
init = chunk_start;
|
||||||
upper_bound = chunk_end;
|
upper_bound = chunk_end;
|
||||||
inc = "1";
|
inc = vwidth;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -60,9 +60,9 @@ inline void element_wise_loop_1D(kernel_generation_stream & stream, fetch_type f
|
|||||||
std::string strwidth = tools::to_string(vwidth);
|
std::string strwidth = tools::to_string(vwidth);
|
||||||
|
|
||||||
std::string init, upper_bound, inc;
|
std::string init, upper_bound, inc;
|
||||||
fetching_loop_info(fetch, bound, stream, init, upper_bound, inc, domain_id, domain_size, device);
|
fetching_loop_info(fetch, bound, stream, init, upper_bound, inc, domain_id, domain_size, device, strwidth);
|
||||||
std::string boundround = upper_bound + "/" + strwidth + "*" + strwidth;
|
std::string boundround = upper_bound + "/" + strwidth + "*" + strwidth;
|
||||||
stream << "for(unsigned int " << i << " = " << init << "*" << strwidth << "; " << i << " < " << boundround << "; " << i << " += " << inc << "*" << strwidth << ")" << std::endl;
|
stream << "for(unsigned int " << i << " = " << init << "; " << i << " < " << boundround << "; " << i << " += " << inc << ")" << std::endl;
|
||||||
stream << "{" << std::endl;
|
stream << "{" << std::endl;
|
||||||
stream.inc_tab();
|
stream.inc_tab();
|
||||||
generate_body(vwidth);
|
generate_body(vwidth);
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -73,7 +73,7 @@ def main():
|
|||||||
libraries += ['gnustl_shared']
|
libraries += ['gnustl_shared']
|
||||||
|
|
||||||
#Source files
|
#Source files
|
||||||
src = 'src/lib/runtime/predictors/random_forest.cpp src/lib/runtime/profiles.cpp src/lib/runtime/database.cpp src/lib/runtime/execute.cpp src/lib/exception/driver.cpp src/lib/exception/api.cpp src/lib/random/rand.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/object.cpp src/lib/value_scalar.cpp src/lib/array.cpp src/lib/api/blas/cublas.cpp src/lib/api/blas/clBLAS.cpp src/lib/driver/dispatch.cpp src/lib/driver/kernel.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/check.cpp src/lib/driver/command_queue.cpp src/lib/driver/handle.cpp src/lib/driver/context.cpp src/lib/driver/program.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
src = 'src/lib/exception/api.cpp src/lib/exception/driver.cpp src/lib/value_scalar.cpp src/lib/random/rand.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/backend.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/buffer.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/runtime/execute.cpp src/lib/runtime/predictors/random_forest.cpp src/lib/runtime/profiles.cpp src/lib/runtime/database.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||||
boostsrc = 'external/boost/libs/'
|
boostsrc = 'external/boost/libs/'
|
||||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||||
|
@@ -128,6 +128,9 @@ class Tuner:
|
|||||||
profiles = [map(mmap,row) for v in row for row in csv.reader(f, delimiter=',')]
|
profiles = [map(mmap,row) for v in row for row in csv.reader(f, delimiter=',')]
|
||||||
with open(os.path.join(savepath, 'Y.csv')) as f:
|
with open(os.path.join(savepath, 'Y.csv')) as f:
|
||||||
Y = [map(float, row) for row in csv.reader(f, delimiter=',')]
|
Y = [map(float, row) for row in csv.reader(f, delimiter=',')]
|
||||||
|
#for x in X:
|
||||||
|
# tree, _ = tools.tree_of(operation, x, context)
|
||||||
|
# Y.append([performance(x, tools.benchmark(operation(*best), tree)) for best in profiles])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -181,7 +184,6 @@ class Tuner:
|
|||||||
row = Y[X.index(x)]
|
row = Y[X.index(x)]
|
||||||
self.progress_bar.update(1, 1, profiles[argmax(row)], max(row))
|
self.progress_bar.update(1, 1, profiles[argmax(row)], max(row))
|
||||||
self.progress_bar.set_finished()
|
self.progress_bar.set_finished()
|
||||||
|
|
||||||
#Adding external profiles
|
#Adding external profiles
|
||||||
for prof in tools.external_profiles(operation):
|
for prof in tools.external_profiles(operation):
|
||||||
for x, y in zip(X, Y):
|
for x, y in zip(X, Y):
|
||||||
|
Reference in New Issue
Block a user