Tune: Further file hierarchy improvements

This commit is contained in:
Philippe Tillet
2015-08-17 18:01:17 -07:00
parent a23e976ca7
commit da20db711b
59 changed files with 87 additions and 48 deletions

View File

@@ -58,6 +58,8 @@ public:
Vendor vendor() const;
Architecture architecture() const;
std::string infos() const;
backend_type backend() const;
size_t clock_rate() const;
unsigned int address_bits() const;

View File

@@ -1,5 +1,7 @@
#include "isaac/driver/device.h"
#include <algorithm>
#include <sstream>
#include "isaac/driver/device.h"
#include "helpers/ocl/infos.hpp"
namespace isaac
@@ -19,13 +21,17 @@ int Device::cuGetInfo() const
}
Device::Device(int ordinal): backend_(CUDA), h_(backend_, true)
{ cuda::check(cuDeviceGet(h_.cu.get(), ordinal)); }
{
cuda::check(cuDeviceGet(h_.cu.get(), ordinal));
}
#endif
Device::Device(cl_device_id const & device, bool take_ownership) : backend_(OPENCL), h_(backend_, take_ownership)
{ h_.cl() = device; }
{
h_.cl() = device;
}
bool Device::operator==(Device const & other) const
@@ -205,6 +211,22 @@ bool Device::fp64_support() const
}
}
std::string Device::infos() const
{
std::ostringstream oss;
std::vector<size_t> max_wi_sizes = max_work_item_sizes();
oss << "Platform: " << platform().name() << std::endl;
oss << "Vendor: " << vendor_str() << std::endl;
oss << "Name: " << name() << std::endl;
oss << "Maximum total work-group size: " << max_work_group_size() << std::endl;
oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl;
oss << "Local memory size: " << local_mem_size() << std::endl;
return oss.str();
}
// Properties
#ifdef ISAAC_WITH_CUDA
#define CUDACASE(CUNAME) case CUDA: return cuGetInfo<CUNAME>();

View File

@@ -77,13 +77,13 @@ unsigned int base::temporary_workspace(expressions_tuple const &) const
{ return 0; }
base::~base()
{ }
{
}
std::string base::generate(std::string const & suffix, expressions_tuple const & expressions, driver::Device const & device)
{
expressions_tuple::data_type::const_iterator sit;
std::vector<mapping_type>::iterator mit;
int err = is_invalid(expressions, device);
if(err != 0)
throw operation_not_supported_exception("The supplied parameters for this template are invalid : err " + tools::to_string(err));

View File

@@ -22,7 +22,9 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
, unsigned int local_fetch_0, unsigned int local_fetch_1): base::parameters_type(simd_width, local_size_0, local_size_1, 1),
kL(KL), depth(D), mS(ms), kS(ks), nS(ns), A_fetching_policy(A_fetching_policy), B_fetching_policy(B_fetching_policy),
local_fetch_0(local_fetch_0), local_fetch_1(local_fetch_1),
mL(ms*local_size_0), nL(ns*local_size_1){}
mL(ms*local_size_0), nL(ns*local_size_1)
{
}
unsigned int gemm::lmem_usage(expressions_tuple const & expressions) const
@@ -59,7 +61,6 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
if(p_.A_fetching_policy!=FETCH_FROM_LOCAL || p_.B_fetching_policy!=FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
if ((p_.mS % p_.simd_width) > 0 || (p_.nS % p_.simd_width) > 0)
return TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE;
@@ -459,23 +460,23 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
stream << "}" << std::endl;
if(A_trans_=='N' || B_trans_=='T')
stream << "int Ky = K - idT.y;" << std::endl;
if(A_trans_=='T' || B_trans_=='N')
stream << "int Kx = K - idT.x;" << std::endl;
// if(A_trans_=='N' || B_trans_=='T')
// stream << "int Ky = K - idT.y;" << std::endl;
// if(A_trans_=='T' || B_trans_=='N')
// stream << "int Kx = K - idT.x;" << std::endl;
if(A_trans_=='N' || B_trans_=='T')
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
stream << "int condy" << k << " = " << k << " < Ky;" << std::endl;
// if(A_trans_=='N' || B_trans_=='T')
// for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
// stream << "int condy" << k << " = " << k << " < Ky;" << std::endl;
if(A_trans_=='T' || B_trans_=='N')
{
for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width)
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
stream << "int condx" << k + s << " = " << k + s << " < Kx;" << std::endl;
}
// if(A_trans_=='T' || B_trans_=='N')
// {
// for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width)
// for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
// stream << "int condx" << k + s << " = " << k + s << " < Kx;" << std::endl;
// }
fetch_to_lds(true);
// fetch_to_lds(true);
stream << "//Write back C" << std::endl;
stream << "M += ids.x;" << std::endl;
@@ -567,9 +568,6 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
stream << "}" << std::endl;
}
// if(p_.simd_width>1)
// std::cout << stream.str() << std::endl;
return stream.str();
#undef VLOAD
@@ -746,7 +744,8 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
{ }
{
}
//
gemm_tn::gemm_tn(unsigned int simd

View File

@@ -217,6 +217,7 @@ profiles::map_type& profiles::init(driver::CommandQueue const & queue)
driver::Device const & device = queue.device();
presets_type::const_iterator it = presets_.find(std::make_tuple(device.type(), device.vendor(), device.architecture()));
if(it==presets_.end()){
//FIXME: Hadle this case
// import(presets_.at(std::make_tuple(device.type(), device.vendor(), driver::Device::Architecture::UNKNOWN)), queue);
}
else

Binary file not shown.

Binary file not shown.

View File

@@ -143,7 +143,7 @@ def main():
libraries=libraries)]
#External
extensions += [Extension('autotuning.external.sklearn._tree',
extensions += [Extension('external.sklearn._tree',
['external/sklearn/_tree.c'],
include_dirs = [numpy_include])]
@@ -155,7 +155,7 @@ def main():
author='Philippe Tillet',
author_email='ptillet@g.harvard.edu',
license='MPL 2.0',
packages=['isaac','isaac.autotuning', 'isaac.autotuning.external', 'isaac.autotuning.external.deap', 'isaac.autotuning.external.deap.tools', 'isaac.autotuning.external.sklearn'],
packages=['isaac','isaac.external','isaac.external.sklearn'],
ext_package="isaac",
ext_modules=extensions,
cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass},

View File

@@ -139,6 +139,7 @@ void export_driver()
.add_property("platform", &sc::driver::Device::platform)
.add_property("vendor", &sc::driver::Device::vendor)
.add_property("nv_compute_capability", &detail::nv_compute_capability)
.add_property("infos", &sc::driver::Device::infos)
;
bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init)

View File

@@ -92,4 +92,16 @@ void export_exceptions()
wrap::exception<isaac::driver::ocl::exception::mem_object_allocation_failure>("MemObjectAllocationFailure")
.def("__str__", &isaac::driver::ocl::exception::mem_object_allocation_failure::what)
;
wrap::exception<isaac::driver::ocl::exception::out_of_host_memory>("OutOfHostMemory")
.def("__str__", &isaac::driver::ocl::exception::out_of_host_memory::what)
;
wrap::exception<isaac::driver::ocl::exception::invalid_work_group_size>("InvalidWorkGroupSize")
.def("__str__", &isaac::driver::ocl::exception::invalid_work_group_size::what)
;
wrap::exception<isaac::driver::ocl::exception::invalid_value>("InvalidValue")
.def("__str__", &isaac::driver::ocl::exception::invalid_value::what)
;
}

View File

@@ -29,11 +29,11 @@ void export_templates()
bp::enum_<tpt::fetching_policy_type>
("fetching_policy_type");
("fetching_policy_type")
.value("FETCH_FROM_LOCAL", tpt::FETCH_FROM_LOCAL)
.value("FETCH_FROM_GLOBAL_STRIDED", tpt::FETCH_FROM_GLOBAL_STRIDED)
.value("FETCH_FROM_GLOBAL_CONTIGUOUS", tpt::FETCH_FROM_GLOBAL_CONTIGUOUS);
bp::scope().attr("FETCH_FROM_LOCAL") = tpt::FETCH_FROM_LOCAL;
bp::scope().attr("FETCH_FROM_GLOBAL_STRIDED") = tpt::FETCH_FROM_GLOBAL_CONTIGUOUS;
bp::scope().attr("FETCH_FROM_GLOBAL_CONTIGUOUS") = tpt::FETCH_FROM_GLOBAL_STRIDED;
//Base
{

Binary file not shown.

View File

@@ -13,7 +13,7 @@ from kivy.uix.settings import SettingsWithNoMenu
import isaac as sc
import json
from isaac.autotuning.tune import tune
from tune.tune import do_tuning
__version__ = '1.0'
@@ -62,7 +62,8 @@ class IsaacApp(App):
json_path = ''
#FIXME: Move profiling logics into tuning
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
tune(device, operation, json_path)
print device.infos
do_tuning(device, operation, json_path)
def show_benchmark(self):
pass

Binary file not shown.

Binary file not shown.

View File

View File

@@ -1,4 +1,4 @@
from external.sklearn.forest import RandomForestRegressor
from isaac.external.sklearn.forest import RandomForestRegressor
import numpy as np
def gmean(a, axis=0, dtype=None):

View File

@@ -13,10 +13,12 @@ from numpy import cumsum
import tools
fetch_types = [sc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
sc.templates.FETCH_FROM_GLOBAL_STRIDED,
sc.templates.FETCH_FROM_LOCAL,
sc.templates.FETCH_FROM_LOCAL]
fetch_types = [sc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_CONTIGUOUS,
sc.templates.fetching_policy_type.FETCH_FROM_GLOBAL_STRIDED,
sc.templates.fetching_policy_type.FETCH_FROM_LOCAL,
sc.templates.fetching_policy_type.FETCH_FROM_LOCAL]
to_catch = (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure, sc.InvalidWorkGroupSize, sc.OutOfHostMemory, sc.InvalidValue)
def exhaustive(template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context)
@@ -33,7 +35,7 @@ def exhaustive(template, sizes, context):
time = tools.benchmark(template, parameters, tree)
if not best or time < best[1]:
best = parameters, time
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
except to_catch:
pass
if best:
stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0]))
@@ -73,7 +75,6 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
def evaluate(genome):
idx = tuple(genome)
if idx not in cache:
print decode(genome)
cache[idx] = tools.benchmark(template, decode(genome), tree)
return cache[idx],
@@ -100,7 +101,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
try:
individual.fitness.values = toolbox.evaluate(genome)
population += [individual]
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure ):
except to_catch:
pass
genome = encode(list(initializer.next()))
hof.update(population)
@@ -134,7 +135,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
#Reproduction
else:
offspring += [random.choice(population)]
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
except to_catch:
pass
@@ -173,7 +174,7 @@ def is_local_optimum(parameters, template, sizes, context):
#Evaluate the provided parameters guess
try:
reference = tools.benchmark(template, parameters, tree)
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
except to_catch:
return False
#Latency bound -- ignore
@@ -190,7 +191,7 @@ def is_local_optimum(parameters, template, sizes, context):
time = tools.benchmark(template, x, tree)
if time/reference < .97:
return False
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
except to_catch:
pass
return True

View File

@@ -4,7 +4,7 @@ from itertools import chain, product
from numpy import argsort, argmax
from operator import mul
import isaac as sc
from external.sklearn.forest import RandomForestRegressor
from isaac.external.sklearn.forest import RandomForestRegressor
import optimize, tools, model
from json import encoder
import json
@@ -21,7 +21,7 @@ def pow2range(a, b):
return [2**x for x in range(a, b)]
def tune(device, operation, json_path):
def do_tuning(device, operation, json_path):
#Context
context = sc.driver.context(device)

View File

@@ -1,6 +1,6 @@
import argparse
import isaac as sc
from isaac.autotuning.tune import tune
from tune.tune import do_tuning
def parse_arguments():
platforms = sc.driver.get_platforms()
@@ -32,4 +32,4 @@ def parse_arguments():
if __name__ == "__main__":
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
args = parse_arguments()
tune(*args)
do_tuning(*args)