Tune: Further file hierarchy improvements
This commit is contained in:
@@ -58,6 +58,8 @@ public:
|
||||
Vendor vendor() const;
|
||||
Architecture architecture() const;
|
||||
|
||||
std::string infos() const;
|
||||
|
||||
backend_type backend() const;
|
||||
size_t clock_rate() const;
|
||||
unsigned int address_bits() const;
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#include "isaac/driver/device.h"
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
|
||||
#include "isaac/driver/device.h"
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
@@ -19,13 +21,17 @@ int Device::cuGetInfo() const
|
||||
}
|
||||
|
||||
Device::Device(int ordinal): backend_(CUDA), h_(backend_, true)
|
||||
{ cuda::check(cuDeviceGet(h_.cu.get(), ordinal)); }
|
||||
{
|
||||
cuda::check(cuDeviceGet(h_.cu.get(), ordinal));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Device::Device(cl_device_id const & device, bool take_ownership) : backend_(OPENCL), h_(backend_, take_ownership)
|
||||
{ h_.cl() = device; }
|
||||
{
|
||||
h_.cl() = device;
|
||||
}
|
||||
|
||||
|
||||
bool Device::operator==(Device const & other) const
|
||||
@@ -205,6 +211,22 @@ bool Device::fp64_support() const
|
||||
}
|
||||
}
|
||||
|
||||
std::string Device::infos() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
std::vector<size_t> max_wi_sizes = max_work_item_sizes();
|
||||
|
||||
oss << "Platform: " << platform().name() << std::endl;
|
||||
oss << "Vendor: " << vendor_str() << std::endl;
|
||||
oss << "Name: " << name() << std::endl;
|
||||
oss << "Maximum total work-group size: " << max_work_group_size() << std::endl;
|
||||
oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl;
|
||||
oss << "Local memory size: " << local_mem_size() << std::endl;
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
// Properties
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
#define CUDACASE(CUNAME) case CUDA: return cuGetInfo<CUNAME>();
|
||||
|
@@ -77,13 +77,13 @@ unsigned int base::temporary_workspace(expressions_tuple const &) const
|
||||
{ return 0; }
|
||||
|
||||
base::~base()
|
||||
{ }
|
||||
{
|
||||
}
|
||||
|
||||
std::string base::generate(std::string const & suffix, expressions_tuple const & expressions, driver::Device const & device)
|
||||
{
|
||||
expressions_tuple::data_type::const_iterator sit;
|
||||
std::vector<mapping_type>::iterator mit;
|
||||
|
||||
int err = is_invalid(expressions, device);
|
||||
if(err != 0)
|
||||
throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err));
|
||||
|
@@ -22,7 +22,9 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
, unsigned int local_fetch_0, unsigned int local_fetch_1): base::parameters_type(simd_width, local_size_0, local_size_1, 1),
|
||||
kL(KL), depth(D), mS(ms), kS(ks), nS(ns), A_fetching_policy(A_fetching_policy), B_fetching_policy(B_fetching_policy),
|
||||
local_fetch_0(local_fetch_0), local_fetch_1(local_fetch_1),
|
||||
mL(ms*local_size_0), nL(ns*local_size_1){}
|
||||
mL(ms*local_size_0), nL(ns*local_size_1)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
unsigned int gemm::lmem_usage(expressions_tuple const & expressions) const
|
||||
@@ -59,7 +61,6 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
|
||||
|
||||
if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
|
||||
return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
|
||||
|
||||
@@ -459,23 +460,23 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
stream << "}" << std::endl;
|
||||
|
||||
|
||||
if(A_trans_=='N' || B_trans_=='T')
|
||||
stream << "int Ky = K - idT.y;" << std::endl;
|
||||
if(A_trans_=='T' || B_trans_=='N')
|
||||
stream << "int Kx = K - idT.x;" << std::endl;
|
||||
// if(A_trans_=='N' || B_trans_=='T')
|
||||
// stream << "int Ky = K - idT.y;" << std::endl;
|
||||
// if(A_trans_=='T' || B_trans_=='N')
|
||||
// stream << "int Kx = K - idT.x;" << std::endl;
|
||||
|
||||
if(A_trans_=='N' || B_trans_=='T')
|
||||
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
||||
stream << "int condy" << k << " = " << k << " < Ky;" << std::endl;
|
||||
// if(A_trans_=='N' || B_trans_=='T')
|
||||
// for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
||||
// stream << "int condy" << k << " = " << k << " < Ky;" << std::endl;
|
||||
|
||||
if(A_trans_=='T' || B_trans_=='N')
|
||||
{
|
||||
for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width)
|
||||
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||
stream << "int condx" << k + s << " = " << k + s << " < Kx;" << std::endl;
|
||||
}
|
||||
// if(A_trans_=='T' || B_trans_=='N')
|
||||
// {
|
||||
// for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width)
|
||||
// for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||
// stream << "int condx" << k + s << " = " << k + s << " < Kx;" << std::endl;
|
||||
// }
|
||||
|
||||
fetch_to_lds(true);
|
||||
// fetch_to_lds(true);
|
||||
|
||||
stream << "//Write back C" << std::endl;
|
||||
stream << "M += ids.x;" << std::endl;
|
||||
@@ -567,9 +568,6 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
stream << "}" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
// if(p_.simd_width>1)
|
||||
// std::cout << stream.str() << std::endl;
|
||||
return stream.str();
|
||||
|
||||
#undef VLOAD
|
||||
@@ -746,7 +744,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
|
||||
{ }
|
||||
{
|
||||
}
|
||||
|
||||
//
|
||||
gemm_tn::gemm_tn(unsigned int simd
|
||||
|
@@ -217,6 +217,7 @@ profiles::map_type& profiles::init(driver::CommandQueue const & queue)
|
||||
driver::Device const & device = queue.device();
|
||||
presets_type::const_iterator it = presets_.find(std::make_tuple(device.type(), device.vendor(), device.architecture()));
|
||||
if(it==presets_.end()){
|
||||
//FIXME: Hadle this case
|
||||
// import(presets_.at(std::make_tuple(device.type(), device.vendor(), driver::Device::Architecture::UNKNOWN)), queue);
|
||||
}
|
||||
else
|
||||
|
BIN
python/isaac/autotuning/external/deap/__init__.pyc
vendored
BIN
python/isaac/autotuning/external/deap/__init__.pyc
vendored
Binary file not shown.
BIN
python/isaac/autotuning/external/deap/base.pyc
vendored
BIN
python/isaac/autotuning/external/deap/base.pyc
vendored
Binary file not shown.
BIN
python/isaac/autotuning/external/deap/creator.pyc
vendored
BIN
python/isaac/autotuning/external/deap/creator.pyc
vendored
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
python/isaac/autotuning/external/deap/tools/emo.pyc
vendored
BIN
python/isaac/autotuning/external/deap/tools/emo.pyc
vendored
Binary file not shown.
BIN
python/isaac/autotuning/external/deap/tools/init.pyc
vendored
BIN
python/isaac/autotuning/external/deap/tools/init.pyc
vendored
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -143,7 +143,7 @@ def main():
|
||||
libraries=libraries)]
|
||||
|
||||
#External
|
||||
extensions += [Extension('autotuning.external.sklearn._tree',
|
||||
extensions += [Extension('external.sklearn._tree',
|
||||
['external/sklearn/_tree.c'],
|
||||
include_dirs = [numpy_include])]
|
||||
|
||||
@@ -155,7 +155,7 @@ def main():
|
||||
author='Philippe Tillet',
|
||||
author_email='ptillet@g.harvard.edu',
|
||||
license='MPL 2.0',
|
||||
packages=['isaac','isaac.autotuning', 'isaac.autotuning.external', 'isaac.autotuning.external.deap', 'isaac.autotuning.external.deap.tools', 'isaac.autotuning.external.sklearn'],
|
||||
packages=['isaac','isaac.external','isaac.external.sklearn'],
|
||||
ext_package="isaac",
|
||||
ext_modules=extensions,
|
||||
cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass},
|
||||
|
@@ -139,6 +139,7 @@ void export_driver()
|
||||
.add_property("platform", &sc::driver::Device::platform)
|
||||
.add_property("vendor", &sc::driver::Device::vendor)
|
||||
.add_property("nv_compute_capability", &detail::nv_compute_capability)
|
||||
.add_property("infos", &sc::driver::Device::infos)
|
||||
;
|
||||
|
||||
bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init)
|
||||
|
@@ -92,4 +92,16 @@ void export_exceptions()
|
||||
wrap::exception<isaac::driver::ocl::exception::mem_object_allocation_failure>("MemObjectAllocationFailure")
|
||||
.def("__str__", &isaac::driver::ocl::exception::mem_object_allocation_failure::what)
|
||||
;
|
||||
|
||||
wrap::exception<isaac::driver::ocl::exception::out_of_host_memory>("OutOfHostMemory")
|
||||
.def("__str__", &isaac::driver::ocl::exception::out_of_host_memory::what)
|
||||
;
|
||||
|
||||
wrap::exception<isaac::driver::ocl::exception::invalid_work_group_size>("InvalidWorkGroupSize")
|
||||
.def("__str__", &isaac::driver::ocl::exception::invalid_work_group_size::what)
|
||||
;
|
||||
|
||||
wrap::exception<isaac::driver::ocl::exception::invalid_value>("InvalidValue")
|
||||
.def("__str__", &isaac::driver::ocl::exception::invalid_value::what)
|
||||
;
|
||||
}
|
||||
|
@@ -29,11 +29,11 @@ void export_templates()
|
||||
|
||||
|
||||
bp::enum_<tpt::fetching_policy_type>
|
||||
("fetching_policy_type");
|
||||
("fetching_policy_type")
|
||||
.value("FETCH_FROM_LOCAL", tpt::FETCH_FROM_LOCAL)
|
||||
.value("FETCH_FROM_GLOBAL_STRIDED", tpt::FETCH_FROM_GLOBAL_STRIDED)
|
||||
.value("FETCH_FROM_GLOBAL_CONTIGUOUS", tpt::FETCH_FROM_GLOBAL_CONTIGUOUS);
|
||||
|
||||
bp::scope().attr("FETCH_FROM_LOCAL") = tpt::FETCH_FROM_LOCAL;
|
||||
bp::scope().attr("FETCH_FROM_GLOBAL_STRIDED") = tpt::FETCH_FROM_GLOBAL_CONTIGUOUS;
|
||||
bp::scope().attr("FETCH_FROM_GLOBAL_CONTIGUOUS") = tpt::FETCH_FROM_GLOBAL_STRIDED;
|
||||
|
||||
//Base
|
||||
{
|
||||
|
Binary file not shown.
@@ -13,7 +13,7 @@ from kivy.uix.settings import SettingsWithNoMenu
|
||||
import isaac as sc
|
||||
import json
|
||||
|
||||
from isaac.autotuning.tune import tune
|
||||
from tune.tune import do_tuning
|
||||
|
||||
__version__ = '1.0'
|
||||
|
||||
@@ -62,7 +62,8 @@ class IsaacApp(App):
|
||||
json_path = ''
|
||||
#FIXME: Move profiling logics into tuning
|
||||
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
|
||||
tune(device, operation, json_path)
|
||||
print device.infos
|
||||
do_tuning(device, operation, json_path)
|
||||
|
||||
def show_benchmark(self):
|
||||
pass
|
||||
|
Binary file not shown.
Binary file not shown.
0
tune/android/tune/external/__init__.py
vendored
Normal file
0
tune/android/tune/external/__init__.py
vendored
Normal file
@@ -1,4 +1,4 @@
|
||||
from external.sklearn.forest import RandomForestRegressor
|
||||
from isaac.external.sklearn.forest import RandomForestRegressor
|
||||
import numpy as np
|
||||
|
||||
def gmean(a, axis=0, dtype=None):
|
@@ -13,10 +13,12 @@ from numpy import cumsum
|
||||
|
||||
import tools
|
||||
|
||||
fetch_types = [sc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
|
||||
sc.templates.FETCH_FROM_GLOBAL_STRIDED,
|
||||
sc.templates.FETCH_FROM_LOCAL,
|
||||
sc.templates.FETCH_FROM_LOCAL]
|
||||
fetch_types = [sc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_CONTIGUOUS,
|
||||
sc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_STRIDED,
|
||||
sc.templates.fetching_policy_type.FETCH_FROM_LOCAL,
|
||||
sc.templates.fetching_policy_type.FETCH_FROM_LOCAL]
|
||||
|
||||
to_catch = (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure, sc.InvalidWorkGroupSize, sc.OutOfHostMemory, sc.InvalidValue)
|
||||
|
||||
def exhaustive(template, sizes, context):
|
||||
tree, _ = tools.tree_of(template, sizes, context)
|
||||
@@ -33,7 +35,7 @@ def exhaustive(template, sizes, context):
|
||||
time = tools.benchmark(template, parameters, tree)
|
||||
if not best or time < best[1]:
|
||||
best = parameters, time
|
||||
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||
except to_catch:
|
||||
pass
|
||||
if best:
|
||||
stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0]))
|
||||
@@ -73,7 +75,6 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
|
||||
def evaluate(genome):
|
||||
idx = tuple(genome)
|
||||
if idx not in cache:
|
||||
print decode(genome)
|
||||
cache[idx] = tools.benchmark(template, decode(genome), tree)
|
||||
return cache[idx],
|
||||
|
||||
@@ -100,7 +101,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
|
||||
try:
|
||||
individual.fitness.values = toolbox.evaluate(genome)
|
||||
population += [individual]
|
||||
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure ):
|
||||
except to_catch:
|
||||
pass
|
||||
genome = encode(list(initializer.next()))
|
||||
hof.update(population)
|
||||
@@ -134,7 +135,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
|
||||
#Reproduction
|
||||
else:
|
||||
offspring += [random.choice(population)]
|
||||
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||
except to_catch:
|
||||
pass
|
||||
|
||||
|
||||
@@ -173,7 +174,7 @@ def is_local_optimum(parameters, template, sizes, context):
|
||||
#Evaluate the provided parameters guess
|
||||
try:
|
||||
reference = tools.benchmark(template, parameters, tree)
|
||||
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||
except to_catch:
|
||||
return False
|
||||
|
||||
#Latency bound -- ignore
|
||||
@@ -190,7 +191,7 @@ def is_local_optimum(parameters, template, sizes, context):
|
||||
time = tools.benchmark(template, x, tree)
|
||||
if time/reference < .97:
|
||||
return False
|
||||
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||
except to_catch:
|
||||
pass
|
||||
return True
|
||||
|
@@ -4,7 +4,7 @@ from itertools import chain, product
|
||||
from numpy import argsort, argmax
|
||||
from operator import mul
|
||||
import isaac as sc
|
||||
from external.sklearn.forest import RandomForestRegressor
|
||||
from isaac.external.sklearn.forest import RandomForestRegressor
|
||||
import optimize, tools, model
|
||||
from json import encoder
|
||||
import json
|
||||
@@ -21,7 +21,7 @@ def pow2range(a, b):
|
||||
return [2**x for x in range(a, b)]
|
||||
|
||||
|
||||
def tune(device, operation, json_path):
|
||||
def do_tuning(device, operation, json_path):
|
||||
#Context
|
||||
context = sc.driver.context(device)
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import argparse
|
||||
import isaac as sc
|
||||
from isaac.autotuning.tune import tune
|
||||
from tune.tune import do_tuning
|
||||
|
||||
def parse_arguments():
|
||||
platforms = sc.driver.get_platforms()
|
||||
@@ -32,4 +32,4 @@ def parse_arguments():
|
||||
if __name__ == "__main__":
|
||||
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
|
||||
args = parse_arguments()
|
||||
tune(*args)
|
||||
do_tuning(*args)
|
||||
|
Reference in New Issue
Block a user