Tune: Further file hierarchy improvements

This commit is contained in:
Philippe Tillet
2015-08-17 18:01:17 -07:00
parent a23e976ca7
commit da20db711b
59 changed files with 87 additions and 48 deletions

View File

@@ -58,6 +58,8 @@ public:
Vendor vendor() const; Vendor vendor() const;
Architecture architecture() const; Architecture architecture() const;
std::string infos() const;
backend_type backend() const; backend_type backend() const;
size_t clock_rate() const; size_t clock_rate() const;
unsigned int address_bits() const; unsigned int address_bits() const;

View File

@@ -1,5 +1,7 @@
#include "isaac/driver/device.h"
#include <algorithm> #include <algorithm>
#include <sstream>
#include "isaac/driver/device.h"
#include "helpers/ocl/infos.hpp" #include "helpers/ocl/infos.hpp"
namespace isaac namespace isaac
@@ -19,13 +21,17 @@ int Device::cuGetInfo() const
} }
Device::Device(int ordinal): backend_(CUDA), h_(backend_, true) Device::Device(int ordinal): backend_(CUDA), h_(backend_, true)
{ cuda::check(cuDeviceGet(h_.cu.get(), ordinal)); } {
cuda::check(cuDeviceGet(h_.cu.get(), ordinal));
}
#endif #endif
Device::Device(cl_device_id const & device, bool take_ownership) : backend_(OPENCL), h_(backend_, take_ownership) Device::Device(cl_device_id const & device, bool take_ownership) : backend_(OPENCL), h_(backend_, take_ownership)
{ h_.cl() = device; } {
h_.cl() = device;
}
bool Device::operator==(Device const & other) const bool Device::operator==(Device const & other) const
@@ -205,6 +211,22 @@ bool Device::fp64_support() const
} }
} }
std::string Device::infos() const
{
std::ostringstream oss;
std::vector<size_t> max_wi_sizes = max_work_item_sizes();
oss << "Platform: " << platform().name() << std::endl;
oss << "Vendor: " << vendor_str() << std::endl;
oss << "Name: " << name() << std::endl;
oss << "Maximum total work-group size: " << max_work_group_size() << std::endl;
oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl;
oss << "Local memory size: " << local_mem_size() << std::endl;
return oss.str();
}
// Properties
#ifdef ISAAC_WITH_CUDA #ifdef ISAAC_WITH_CUDA
#define CUDACASE(CUNAME) case CUDA: return cuGetInfo<CUNAME>(); #define CUDACASE(CUNAME) case CUDA: return cuGetInfo<CUNAME>();

View File

@@ -77,13 +77,13 @@ unsigned int base::temporary_workspace(expressions_tuple const &) const
{ return 0; } { return 0; }
base::~base() base::~base()
{ } {
}
std::string base::generate(std::string const & suffix, expressions_tuple const & expressions, driver::Device const & device) std::string base::generate(std::string const & suffix, expressions_tuple const & expressions, driver::Device const & device)
{ {
expressions_tuple::data_type::const_iterator sit; expressions_tuple::data_type::const_iterator sit;
std::vector<mapping_type>::iterator mit; std::vector<mapping_type>::iterator mit;
int err = is_invalid(expressions, device); int err = is_invalid(expressions, device);
if(err != 0) if(err != 0)
throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err)); throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err));

View File

@@ -22,7 +22,9 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
, unsigned int local_fetch_0, unsigned int local_fetch_1): base::parameters_type(simd_width, local_size_0, local_size_1, 1), , unsigned int local_fetch_0, unsigned int local_fetch_1): base::parameters_type(simd_width, local_size_0, local_size_1, 1),
kL(KL), depth(D), mS(ms), kS(ks), nS(ns), A_fetching_policy(A_fetching_policy), B_fetching_policy(B_fetching_policy), kL(KL), depth(D), mS(ms), kS(ks), nS(ns), A_fetching_policy(A_fetching_policy), B_fetching_policy(B_fetching_policy),
local_fetch_0(local_fetch_0), local_fetch_1(local_fetch_1), local_fetch_0(local_fetch_0), local_fetch_1(local_fetch_1),
mL(ms*local_size_0), nL(ns*local_size_1){} mL(ms*local_size_0), nL(ns*local_size_1)
{
}
unsigned int gemm::lmem_usage(expressions_tuple const & expressions) const unsigned int gemm::lmem_usage(expressions_tuple const & expressions) const
@@ -59,7 +61,6 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL) if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0) if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE; return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
@@ -459,23 +460,23 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
stream << "}" << std::endl; stream << "}" << std::endl;
if(A_trans_=='N' || B_trans_=='T') // if(A_trans_=='N' || B_trans_=='T')
stream << "int Ky = K - idT.y;" << std::endl; // stream << "int Ky = K - idT.y;" << std::endl;
if(A_trans_=='T' || B_trans_=='N') // if(A_trans_=='T' || B_trans_=='N')
stream << "int Kx = K - idT.x;" << std::endl; // stream << "int Kx = K - idT.x;" << std::endl;
if(A_trans_=='N' || B_trans_=='T') // if(A_trans_=='N' || B_trans_=='T')
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1) // for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
stream << "int condy" << k << " = " << k << " < Ky;" << std::endl; // stream << "int condy" << k << " = " << k << " < Ky;" << std::endl;
if(A_trans_=='T' || B_trans_=='N') // if(A_trans_=='T' || B_trans_=='N')
{ // {
for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width) // for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width)
for(unsigned int s = 0 ; s < p_.simd_width ; ++s) // for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
stream << "int condx" << k + s << " = " << k + s << " < Kx;" << std::endl; // stream << "int condx" << k + s << " = " << k + s << " < Kx;" << std::endl;
} // }
fetch_to_lds(true); // fetch_to_lds(true);
stream << "//Write back C" << std::endl; stream << "//Write back C" << std::endl;
stream << "M += ids.x;" << std::endl; stream << "M += ids.x;" << std::endl;
@@ -567,9 +568,6 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
stream << "}" << std::endl; stream << "}" << std::endl;
} }
// if(p_.simd_width>1)
// std::cout << stream.str() << std::endl;
return stream.str(); return stream.str();
#undef VLOAD #undef VLOAD
@@ -746,7 +744,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
, fetching_policy_type Afetch , fetching_policy_type Bfetch , fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) : , int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N') gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
{ } {
}
// //
gemm_tn::gemm_tn(unsigned int simd gemm_tn::gemm_tn(unsigned int simd

View File

@@ -217,6 +217,7 @@ profiles::map_type& profiles::init(driver::CommandQueue const & queue)
driver::Device const & device = queue.device(); driver::Device const & device = queue.device();
presets_type::const_iterator it = presets_.find(std::make_tuple(device.type(), device.vendor(), device.architecture())); presets_type::const_iterator it = presets_.find(std::make_tuple(device.type(), device.vendor(), device.architecture()));
if(it==presets_.end()){ if(it==presets_.end()){
//FIXME: Hadle this case
// import(presets_.at(std::make_tuple(device.type(), device.vendor(), driver::Device::Architecture::UNKNOWN)), queue); // import(presets_.at(std::make_tuple(device.type(), device.vendor(), driver::Device::Architecture::UNKNOWN)), queue);
} }
else else

Binary file not shown.

Binary file not shown.

View File

@@ -143,7 +143,7 @@ def main():
libraries=libraries)] libraries=libraries)]
#External #External
extensions += [Extension('autotuning.external.sklearn._tree', extensions += [Extension('external.sklearn._tree',
['external/sklearn/_tree.c'], ['external/sklearn/_tree.c'],
include_dirs = [numpy_include])] include_dirs = [numpy_include])]
@@ -155,7 +155,7 @@ def main():
author='Philippe Tillet', author='Philippe Tillet',
author_email='ptillet@g.harvard.edu', author_email='ptillet@g.harvard.edu',
license='MPL 2.0', license='MPL 2.0',
packages=['isaac','isaac.autotuning', 'isaac.autotuning.external', 'isaac.autotuning.external.deap', 'isaac.autotuning.external.deap.tools', 'isaac.autotuning.external.sklearn'], packages=['isaac','isaac.external','isaac.external.sklearn'],
ext_package="isaac", ext_package="isaac",
ext_modules=extensions, ext_modules=extensions,
cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass}, cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass},

View File

@@ -139,6 +139,7 @@ void export_driver()
.add_property("platform", &sc::driver::Device::platform) .add_property("platform", &sc::driver::Device::platform)
.add_property("vendor", &sc::driver::Device::vendor) .add_property("vendor", &sc::driver::Device::vendor)
.add_property("nv_compute_capability", &detail::nv_compute_capability) .add_property("nv_compute_capability", &detail::nv_compute_capability)
.add_property("infos", &sc::driver::Device::infos)
; ;
bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init) bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init)

View File

@@ -92,4 +92,16 @@ void export_exceptions()
wrap::exception<isaac::driver::ocl::exception::mem_object_allocation_failure>("MemObjectAllocationFailure") wrap::exception<isaac::driver::ocl::exception::mem_object_allocation_failure>("MemObjectAllocationFailure")
.def("__str__", &isaac::driver::ocl::exception::mem_object_allocation_failure::what) .def("__str__", &isaac::driver::ocl::exception::mem_object_allocation_failure::what)
; ;
wrap::exception<isaac::driver::ocl::exception::out_of_host_memory>("OutOfHostMemory")
.def("__str__", &isaac::driver::ocl::exception::out_of_host_memory::what)
;
wrap::exception<isaac::driver::ocl::exception::invalid_work_group_size>("InvalidWorkGroupSize")
.def("__str__", &isaac::driver::ocl::exception::invalid_work_group_size::what)
;
wrap::exception<isaac::driver::ocl::exception::invalid_value>("InvalidValue")
.def("__str__", &isaac::driver::ocl::exception::invalid_value::what)
;
} }

View File

@@ -29,11 +29,11 @@ void export_templates()
bp::enum_<tpt::fetching_policy_type> bp::enum_<tpt::fetching_policy_type>
("fetching_policy_type"); ("fetching_policy_type")
.value("FETCH_FROM_LOCAL", tpt::FETCH_FROM_LOCAL)
.value("FETCH_FROM_GLOBAL_STRIDED", tpt::FETCH_FROM_GLOBAL_STRIDED)
.value("FETCH_FROM_GLOBAL_CONTIGUOUS", tpt::FETCH_FROM_GLOBAL_CONTIGUOUS);
bp::scope().attr("FETCH_FROM_LOCAL") = tpt::FETCH_FROM_LOCAL;
bp::scope().attr("FETCH_FROM_GLOBAL_STRIDED") = tpt::FETCH_FROM_GLOBAL_CONTIGUOUS;
bp::scope().attr("FETCH_FROM_GLOBAL_CONTIGUOUS") = tpt::FETCH_FROM_GLOBAL_STRIDED;
//Base //Base
{ {

Binary file not shown.

View File

@@ -13,7 +13,7 @@ from kivy.uix.settings import SettingsWithNoMenu
import isaac as sc import isaac as sc
import json import json
from isaac.autotuning.tune import tune from tune.tune import do_tuning
__version__ = '1.0' __version__ = '1.0'
@@ -62,7 +62,8 @@ class IsaacApp(App):
json_path = '' json_path = ''
#FIXME: Move profiling logics into tuning #FIXME: Move profiling logics into tuning
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
tune(device, operation, json_path) print device.infos
do_tuning(device, operation, json_path)
def show_benchmark(self): def show_benchmark(self):
pass pass

Binary file not shown.

Binary file not shown.

View File

View File

@@ -1,4 +1,4 @@
from external.sklearn.forest import RandomForestRegressor from isaac.external.sklearn.forest import RandomForestRegressor
import numpy as np import numpy as np
def gmean(a, axis=0, dtype=None): def gmean(a, axis=0, dtype=None):

View File

@@ -13,10 +13,12 @@ from numpy import cumsum
import tools import tools
fetch_types = [sc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS, fetch_types = [sc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_CONTIGUOUS,
sc.templates.FETCH_FROM_GLOBAL_STRIDED, sc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_STRIDED,
sc.templates.FETCH_FROM_LOCAL, sc.templates.fetching_policy_type.FETCH_FROM_LOCAL,
sc.templates.FETCH_FROM_LOCAL] sc.templates.fetching_policy_type.FETCH_FROM_LOCAL]
to_catch = (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure, sc.InvalidWorkGroupSize, sc.OutOfHostMemory, sc.InvalidValue)
def exhaustive(template, sizes, context): def exhaustive(template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context) tree, _ = tools.tree_of(template, sizes, context)
@@ -33,7 +35,7 @@ def exhaustive(template, sizes, context):
time = tools.benchmark(template, parameters, tree) time = tools.benchmark(template, parameters, tree)
if not best or time < best[1]: if not best or time < best[1]:
best = parameters, time best = parameters, time
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure): except to_catch:
pass pass
if best: if best:
stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0])) stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0]))
@@ -73,7 +75,6 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
def evaluate(genome): def evaluate(genome):
idx = tuple(genome) idx = tuple(genome)
if idx not in cache: if idx not in cache:
print decode(genome)
cache[idx] = tools.benchmark(template, decode(genome), tree) cache[idx] = tools.benchmark(template, decode(genome), tree)
return cache[idx], return cache[idx],
@@ -100,7 +101,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
try: try:
individual.fitness.values = toolbox.evaluate(genome) individual.fitness.values = toolbox.evaluate(genome)
population += [individual] population += [individual]
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure ): except to_catch:
pass pass
genome = encode(list(initializer.next())) genome = encode(list(initializer.next()))
hof.update(population) hof.update(population)
@@ -134,7 +135,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
#Reproduction #Reproduction
else: else:
offspring += [random.choice(population)] offspring += [random.choice(population)]
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure): except to_catch:
pass pass
@@ -173,7 +174,7 @@ def is_local_optimum(parameters, template, sizes, context):
#Evaluate the provided parameters guess #Evaluate the provided parameters guess
try: try:
reference = tools.benchmark(template, parameters, tree) reference = tools.benchmark(template, parameters, tree)
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure): except to_catch:
return False return False
#Latency bound -- ignore #Latency bound -- ignore
@@ -190,7 +191,7 @@ def is_local_optimum(parameters, template, sizes, context):
time = tools.benchmark(template, x, tree) time = tools.benchmark(template, x, tree)
if time/reference < .97: if time/reference < .97:
return False return False
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure): except to_catch:
pass pass
return True return True

View File

@@ -4,7 +4,7 @@ from itertools import chain, product
from numpy import argsort, argmax from numpy import argsort, argmax
from operator import mul from operator import mul
import isaac as sc import isaac as sc
from external.sklearn.forest import RandomForestRegressor from isaac.external.sklearn.forest import RandomForestRegressor
import optimize, tools, model import optimize, tools, model
from json import encoder from json import encoder
import json import json
@@ -21,7 +21,7 @@ def pow2range(a, b):
return [2**x for x in range(a, b)] return [2**x for x in range(a, b)]
def tune(device, operation, json_path): def do_tuning(device, operation, json_path):
#Context #Context
context = sc.driver.context(device) context = sc.driver.context(device)

View File

@@ -1,6 +1,6 @@
import argparse import argparse
import isaac as sc import isaac as sc
from isaac.autotuning.tune import tune from tune.tune import do_tuning
def parse_arguments(): def parse_arguments():
platforms = sc.driver.get_platforms() platforms = sc.driver.get_platforms()
@@ -32,4 +32,4 @@ def parse_arguments():
if __name__ == "__main__": if __name__ == "__main__":
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
args = parse_arguments() args = parse_arguments()
tune(*args) do_tuning(*args)