Added exhaustive search backend
This commit is contained in:
@@ -48,10 +48,10 @@ TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
|
|||||||
def parameter_space(operation):
|
def parameter_space(operation):
|
||||||
simd = [1, 2, 4, 8]
|
simd = [1, 2, 4, 8]
|
||||||
pow2_1D = [2**k for k in range(12)]
|
pow2_1D = [2**k for k in range(12)]
|
||||||
pow2_2D = [2**k for k in range(10)]
|
pow2_2D = [8, 16]
|
||||||
pow2_2D_unrolled = [2**k for k in range(6)]
|
pow2_2D_unrolled = [1, 2, 4, 8]
|
||||||
FetchingPolicy = vcl.atidlas.FetchingPolicy
|
FetchingPolicy = vcl.atidlas.FetchingPolicy
|
||||||
fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS]
|
fetch = [FetchingPolicy.FETCH_FROM_LOCAL]
|
||||||
if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
|
if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
|
||||||
if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
|
if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
|
||||||
if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
|
if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
|
||||||
@@ -97,7 +97,7 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
fname = os.devnull
|
fname = os.devnull
|
||||||
with open(fname, "w+") as archive:
|
with open(fname, "w+") as archive:
|
||||||
with vcl.Statement(node) as statement:
|
with vcl.Statement(node) as statement:
|
||||||
result = optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
result = optimize.exhaustive(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
||||||
TYPES[operation]['parameter-names'], parameter_space(operation), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, s, t]), TYPES[operation]['perf-measure'], archive)
|
TYPES[operation]['parameter-names'], parameter_space(operation), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, s, t]), TYPES[operation]['perf-measure'], archive)
|
||||||
if result and viennacl_root:
|
if result and viennacl_root:
|
||||||
vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1])
|
vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1])
|
||||||
|
@@ -1,9 +1,12 @@
|
|||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
import sys
|
||||||
import tools
|
import tools
|
||||||
import pyviennacl as vcl
|
import pyviennacl as vcl
|
||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
|
from deap import algorithms
|
||||||
|
|
||||||
from collections import OrderedDict as odict
|
from collections import OrderedDict as odict
|
||||||
|
|
||||||
def closest_divisor(N, x):
|
def closest_divisor(N, x):
|
||||||
@@ -154,28 +157,51 @@ class GeneticOperators(object):
|
|||||||
return individual,
|
return individual,
|
||||||
|
|
||||||
def evaluate(self, individual):
|
def evaluate(self, individual):
|
||||||
tupindividual = tuple(individual)
|
if tuple(individual) not in self.cache:
|
||||||
if tupindividual not in self.cache:
|
|
||||||
template = self.build_template(self.TemplateType.Parameters(*individual))
|
template = self.build_template(self.TemplateType.Parameters(*individual))
|
||||||
registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4
|
try:
|
||||||
lmem_usage = template.lmem_usage(vcl.atidlas.StatementsTuple(self.statement))
|
self.cache[tuple(individual)] = tools.benchmark(template, self.statement, self.device)
|
||||||
local_size = template.parameters.local_size_0*template.parameters.local_size_1
|
except:
|
||||||
occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage)
|
self.cache[tuple(individual)] = 10
|
||||||
if occupancy_record.occupancy < 15 :
|
return self.cache[tuple(individual)],
|
||||||
self.cache[tupindividual] = 10
|
|
||||||
else:
|
def eaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, maxtime, maxgen, halloffame, compute_perf, perf_metric):
|
||||||
try:
|
# Evaluate the individuals with an invalid fitness
|
||||||
template.execute(self.statement, True)
|
invalid_ind = [ind for ind in population if not ind.fitness.valid]
|
||||||
self.statement.result.context.finish_all_queues()
|
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
|
||||||
N = 0
|
for ind, fit in zip(invalid_ind, fitnesses):
|
||||||
current_time = 0
|
ind.fitness.values = fit
|
||||||
while current_time < 1e-2:
|
|
||||||
time_before = time.time()
|
if halloffame is not None:
|
||||||
template.execute(self.statement,False)
|
halloffame.update(population)
|
||||||
self.statement.result.context.finish_all_queues()
|
|
||||||
current_time += time.time() - time_before
|
# Begin the generational process
|
||||||
N+=1
|
gen = 0
|
||||||
self.cache[tupindividual] = current_time/N
|
maxtime = time.strptime(maxtime, '%Mm%Ss')
|
||||||
except:
|
maxtime = maxtime.tm_min*60 + maxtime.tm_sec
|
||||||
self.cache[tupindividual] = 10
|
start_time = time.time()
|
||||||
return self.cache[tupindividual],
|
while time.time() - start_time < maxtime and gen < maxgen:
|
||||||
|
# Vary the population
|
||||||
|
offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb)
|
||||||
|
|
||||||
|
# Evaluate the individuals with an invalid fitness
|
||||||
|
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
||||||
|
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
|
||||||
|
for ind, fit in zip(invalid_ind, fitnesses):
|
||||||
|
ind.fitness.values = fit
|
||||||
|
|
||||||
|
# Update the hall of fame with the generated individuals
|
||||||
|
if halloffame is not None:
|
||||||
|
halloffame.update(offspring)
|
||||||
|
|
||||||
|
# Select the next generation population
|
||||||
|
population[:] = toolbox.select(population + offspring, mu)
|
||||||
|
|
||||||
|
# Update the statistics with the new population
|
||||||
|
gen = gen + 1
|
||||||
|
|
||||||
|
best_profile = '(%s)'%','.join(map(str,halloffame[0]));
|
||||||
|
best_performance = compute_perf(halloffame[0].fitness.values[0])
|
||||||
|
sys.stdout.write('Generation %d | Time %d | Best %d %s [ for %s ]\n'%(gen, time.time() - start_time, best_performance, perf_metric, best_profile))
|
||||||
|
sys.stdout.write('\n')
|
||||||
|
return population
|
@@ -1,57 +1,39 @@
|
|||||||
import array
|
import array
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from deap import algorithms
|
import itertools
|
||||||
|
|
||||||
|
import tools
|
||||||
|
import deap.tools
|
||||||
|
|
||||||
from deap import base
|
from deap import base
|
||||||
from deap import creator
|
from deap import creator
|
||||||
from deap import tools
|
from genetic import GeneticOperators
|
||||||
|
from genetic import eaMuPlusLambda
|
||||||
|
|
||||||
from genetic_operators import GeneticOperators
|
def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
|
||||||
|
device = context.devices[0]
|
||||||
def eaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, maxtime, maxgen, halloffame, compute_perf, perf_metric):
|
nvalid = 0
|
||||||
# Evaluate the individuals with an invalid fitness
|
current = 0
|
||||||
invalid_ind = [ind for ind in population if not ind.fitness.valid]
|
minT = float('inf')
|
||||||
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
|
for individual in itertools.product(*all_parameters):
|
||||||
for ind, fit in zip(invalid_ind, fitnesses):
|
template = build_template(TemplateType.Parameters(*individual))
|
||||||
ind.fitness.values = fit
|
if not tools.skip(template, statement, device):
|
||||||
|
nvalid = nvalid + 1
|
||||||
if halloffame is not None:
|
for individual in itertools.product(*all_parameters):
|
||||||
halloffame.update(population)
|
template = build_template(TemplateType.Parameters(*individual))
|
||||||
|
try:
|
||||||
# Begin the generational process
|
T = tools.benchmark(template,statement,device)
|
||||||
gen = 0
|
current = current + 1
|
||||||
maxtime = time.strptime(maxtime, '%Mm%Ss')
|
if T < minT:
|
||||||
maxtime = maxtime.tm_min*60 + maxtime.tm_sec
|
minT = T
|
||||||
start_time = time.time()
|
best = individual
|
||||||
while time.time() - start_time < maxtime and gen < maxgen:
|
print '%d / %d , Best is %d %s for %s\r'%(current, nvalid, compute_perf(minT), perf_metric, best)
|
||||||
# Vary the population
|
except:
|
||||||
offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb)
|
pass
|
||||||
|
|
||||||
# Evaluate the individuals with an invalid fitness
|
|
||||||
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
|
|
||||||
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
|
|
||||||
for ind, fit in zip(invalid_ind, fitnesses):
|
|
||||||
ind.fitness.values = fit
|
|
||||||
|
|
||||||
# Update the hall of fame with the generated individuals
|
|
||||||
if halloffame is not None:
|
|
||||||
halloffame.update(offspring)
|
|
||||||
|
|
||||||
# Select the next generation population
|
|
||||||
population[:] = toolbox.select(population + offspring, mu)
|
|
||||||
|
|
||||||
# Update the statistics with the new population
|
|
||||||
gen = gen + 1
|
|
||||||
|
|
||||||
best_profile = '(%s)'%','.join(map(str,halloffame[0]));
|
|
||||||
best_performance = compute_perf(halloffame[0].fitness.values[0])
|
|
||||||
sys.stdout.write('Generation %d | Time %d | Best %d %s [ for %s ]\n'%(gen, time.time() - start_time, best_performance, perf_metric, best_profile))
|
|
||||||
sys.stdout.write('\n')
|
|
||||||
return population
|
|
||||||
|
|
||||||
|
|
||||||
def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
|
def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
|
||||||
gen = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template)
|
gen = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template)
|
||||||
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
|
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
|
||||||
@@ -68,12 +50,12 @@ def genetic(statement, context, TemplateType, build_template, parameter_names, a
|
|||||||
toolbox.register("select", tools.selBest)
|
toolbox.register("select", tools.selBest)
|
||||||
|
|
||||||
pop = toolbox.population(n=30)
|
pop = toolbox.population(n=30)
|
||||||
hof = tools.HallOfFame(1)
|
hof = deap.tools.HallOfFame(1)
|
||||||
|
|
||||||
best_performer = lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x])
|
best_performer = lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x])
|
||||||
best_profile = lambda x: '(%s)'%','.join(map(str,hof[0]))
|
best_profile = lambda x: '(%s)'%','.join(map(str,hof[0]))
|
||||||
|
|
||||||
stats = tools.Statistics(lambda ind: ind.fitness.values)
|
stats = deap.tools.Statistics(lambda ind: ind.fitness.values)
|
||||||
stats.register("max (" + perf_metric + ")", lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x]))
|
stats.register("max (" + perf_metric + ")", lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x]))
|
||||||
stats.register("profile ", lambda x: '(%s)'%','.join(map(str,hof[0])))
|
stats.register("profile ", lambda x: '(%s)'%','.join(map(str,hof[0])))
|
||||||
|
|
||||||
|
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import division
|
from __future__ import division
|
||||||
import pyopencl
|
import pyopencl
|
||||||
|
import time
|
||||||
|
from pyviennacl.atidlas import StatementsTuple
|
||||||
|
|
||||||
class PhysicalLimits:
|
class PhysicalLimits:
|
||||||
def __init__(self, dev):
|
def __init__(self, dev):
|
||||||
@@ -100,5 +102,37 @@ class OccupancyRecord:
|
|||||||
self.warps_per_mp = self.limit*allocated_warps
|
self.warps_per_mp = self.limit*allocated_warps
|
||||||
self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp
|
self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp
|
||||||
|
|
||||||
|
|
||||||
|
def skip(template, statement, device):
|
||||||
|
statements = StatementsTuple(statement)
|
||||||
|
registers_usage = template.registers_usage(statements)/4
|
||||||
|
lmem_usage = template.lmem_usage(statements)
|
||||||
|
local_size = template.parameters.local_size_0*template.parameters.local_size_1
|
||||||
|
occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
|
||||||
|
if template.check(statement) or occupancy_record.occupancy < 15:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def benchmark(template, statement, device):
|
||||||
|
statements = StatementsTuple(statement)
|
||||||
|
registers_usage = template.registers_usage(statements)/4
|
||||||
|
lmem_usage = template.lmem_usage(statements)
|
||||||
|
local_size = template.parameters.local_size_0*template.parameters.local_size_1
|
||||||
|
occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
|
||||||
|
if occupancy_record.occupancy < 15 :
|
||||||
|
raise ValueError("Template has too low occupancy")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
template.execute(statement, True)
|
||||||
|
statement.result.context.finish_all_queues()
|
||||||
|
N = 0
|
||||||
|
current_time = 0
|
||||||
|
while current_time < 1e-2:
|
||||||
|
time_before = time.time()
|
||||||
|
template.execute(statement,False)
|
||||||
|
statement.result.context.finish_all_queues()
|
||||||
|
current_time += time.time() - time_before
|
||||||
|
N+=1
|
||||||
|
return current_time/N
|
||||||
|
except:
|
||||||
|
raise ValueError("Invalid template")
|
||||||
|
Reference in New Issue
Block a user