Added exhaustive search backend

This commit is contained in:
Philippe Tillet
2014-09-11 16:13:46 -04:00
parent bf48d69b17
commit 08d17aa58c
4 changed files with 119 additions and 77 deletions

View File

@@ -48,10 +48,10 @@ TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
def parameter_space(operation): def parameter_space(operation):
simd = [1, 2, 4, 8] simd = [1, 2, 4, 8]
pow2_1D = [2**k for k in range(12)] pow2_1D = [2**k for k in range(12)]
pow2_2D = [2**k for k in range(10)] pow2_2D = [8, 16]
pow2_2D_unrolled = [2**k for k in range(6)] pow2_2D_unrolled = [1, 2, 4, 8]
FetchingPolicy = vcl.atidlas.FetchingPolicy FetchingPolicy = vcl.atidlas.FetchingPolicy
fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS] fetch = [FetchingPolicy.FETCH_FROM_LOCAL]
if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch] if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch] if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch] if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
@@ -97,7 +97,7 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
fname = os.devnull fname = os.devnull
with open(fname, "w+") as archive: with open(fname, "w+") as archive:
with vcl.Statement(node) as statement: with vcl.Statement(node) as statement:
result = optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), result = optimize.exhaustive(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
TYPES[operation]['parameter-names'], parameter_space(operation), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, s, t]), TYPES[operation]['perf-measure'], archive) TYPES[operation]['parameter-names'], parameter_space(operation), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, s, t]), TYPES[operation]['perf-measure'], archive)
if result and viennacl_root: if result and viennacl_root:
vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1]) vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1])

View File

@@ -1,9 +1,12 @@
import random import random
import time import time
import sys
import tools import tools
import pyviennacl as vcl import pyviennacl as vcl
import numpy import numpy
from deap import algorithms
from collections import OrderedDict as odict from collections import OrderedDict as odict
def closest_divisor(N, x): def closest_divisor(N, x):
@@ -154,28 +157,51 @@ class GeneticOperators(object):
return individual, return individual,
def evaluate(self, individual): def evaluate(self, individual):
tupindividual = tuple(individual) if tuple(individual) not in self.cache:
if tupindividual not in self.cache:
template = self.build_template(self.TemplateType.Parameters(*individual)) template = self.build_template(self.TemplateType.Parameters(*individual))
registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4 try:
lmem_usage = template.lmem_usage(vcl.atidlas.StatementsTuple(self.statement)) self.cache[tuple(individual)] = tools.benchmark(template, self.statement, self.device)
local_size = template.parameters.local_size_0*template.parameters.local_size_1 except:
occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage) self.cache[tuple(individual)] = 10
if occupancy_record.occupancy < 15 : return self.cache[tuple(individual)],
self.cache[tupindividual] = 10
else: def eaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, maxtime, maxgen, halloffame, compute_perf, perf_metric):
try: # Evaluate the individuals with an invalid fitness
template.execute(self.statement, True) invalid_ind = [ind for ind in population if not ind.fitness.valid]
self.statement.result.context.finish_all_queues() fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
N = 0 for ind, fit in zip(invalid_ind, fitnesses):
current_time = 0 ind.fitness.values = fit
while current_time < 1e-2:
time_before = time.time() if halloffame is not None:
template.execute(self.statement,False) halloffame.update(population)
self.statement.result.context.finish_all_queues()
current_time += time.time() - time_before # Begin the generational process
N+=1 gen = 0
self.cache[tupindividual] = current_time/N maxtime = time.strptime(maxtime, '%Mm%Ss')
except: maxtime = maxtime.tm_min*60 + maxtime.tm_sec
self.cache[tupindividual] = 10 start_time = time.time()
return self.cache[tupindividual], while time.time() - start_time < maxtime and gen < maxgen:
# Vary the population
offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb)
# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
# Update the hall of fame with the generated individuals
if halloffame is not None:
halloffame.update(offspring)
# Select the next generation population
population[:] = toolbox.select(population + offspring, mu)
# Update the statistics with the new population
gen = gen + 1
best_profile = '(%s)'%','.join(map(str,halloffame[0]));
best_performance = compute_perf(halloffame[0].fitness.values[0])
sys.stdout.write('Generation %d | Time %d | Best %d %s [ for %s ]\n'%(gen, time.time() - start_time, best_performance, perf_metric, best_profile))
sys.stdout.write('\n')
return population

View File

@@ -1,56 +1,38 @@
import array import array
import numpy as np import numpy as np
import random import random
import time
import sys
from deap import algorithms import itertools
import tools
import deap.tools
from deap import base from deap import base
from deap import creator from deap import creator
from deap import tools from genetic import GeneticOperators
from genetic import eaMuPlusLambda
from genetic_operators import GeneticOperators def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
device = context.devices[0]
nvalid = 0
current = 0
minT = float('inf')
for individual in itertools.product(*all_parameters):
template = build_template(TemplateType.Parameters(*individual))
if not tools.skip(template, statement, device):
nvalid = nvalid + 1
for individual in itertools.product(*all_parameters):
template = build_template(TemplateType.Parameters(*individual))
try:
T = tools.benchmark(template,statement,device)
current = current + 1
if T < minT:
minT = T
best = individual
print '%d / %d , Best is %d %s for %s\r'%(current, nvalid, compute_perf(minT), perf_metric, best)
except:
pass
def eaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, maxtime, maxgen, halloffame, compute_perf, perf_metric):
# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in population if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
if halloffame is not None:
halloffame.update(population)
# Begin the generational process
gen = 0
maxtime = time.strptime(maxtime, '%Mm%Ss')
maxtime = maxtime.tm_min*60 + maxtime.tm_sec
start_time = time.time()
while time.time() - start_time < maxtime and gen < maxgen:
# Vary the population
offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb)
# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
# Update the hall of fame with the generated individuals
if halloffame is not None:
halloffame.update(offspring)
# Select the next generation population
population[:] = toolbox.select(population + offspring, mu)
# Update the statistics with the new population
gen = gen + 1
best_profile = '(%s)'%','.join(map(str,halloffame[0]));
best_performance = compute_perf(halloffame[0].fitness.values[0])
sys.stdout.write('Generation %d | Time %d | Best %d %s [ for %s ]\n'%(gen, time.time() - start_time, best_performance, perf_metric, best_profile))
sys.stdout.write('\n')
return population
def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out): def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
gen = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template) gen = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template)
@@ -68,12 +50,12 @@ def genetic(statement, context, TemplateType, build_template, parameter_names, a
toolbox.register("select", tools.selBest) toolbox.register("select", tools.selBest)
pop = toolbox.population(n=30) pop = toolbox.population(n=30)
hof = tools.HallOfFame(1) hof = deap.tools.HallOfFame(1)
best_performer = lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x]) best_performer = lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x])
best_profile = lambda x: '(%s)'%','.join(map(str,hof[0])) best_profile = lambda x: '(%s)'%','.join(map(str,hof[0]))
stats = tools.Statistics(lambda ind: ind.fitness.values) stats = deap.tools.Statistics(lambda ind: ind.fitness.values)
stats.register("max (" + perf_metric + ")", lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x])) stats.register("max (" + perf_metric + ")", lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x]))
stats.register("profile ", lambda x: '(%s)'%','.join(map(str,hof[0]))) stats.register("profile ", lambda x: '(%s)'%','.join(map(str,hof[0])))

View File

@@ -1,5 +1,7 @@
from __future__ import division from __future__ import division
import pyopencl import pyopencl
import time
from pyviennacl.atidlas import StatementsTuple
class PhysicalLimits: class PhysicalLimits:
def __init__(self, dev): def __init__(self, dev):
@@ -101,4 +103,36 @@ class OccupancyRecord:
self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp
def skip(template, statement, device):
statements = StatementsTuple(statement)
registers_usage = template.registers_usage(statements)/4
lmem_usage = template.lmem_usage(statements)
local_size = template.parameters.local_size_0*template.parameters.local_size_1
occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
if template.check(statement) or occupancy_record.occupancy < 15:
return True
return False
def benchmark(template, statement, device):
statements = StatementsTuple(statement)
registers_usage = template.registers_usage(statements)/4
lmem_usage = template.lmem_usage(statements)
local_size = template.parameters.local_size_0*template.parameters.local_size_1
occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
if occupancy_record.occupancy < 15 :
raise ValueError("Template has too low occupancy")
else:
try:
template.execute(statement, True)
statement.result.context.finish_all_queues()
N = 0
current_time = 0
while current_time < 1e-2:
time_before = time.time()
template.execute(statement,False)
statement.result.context.finish_all_queues()
current_time += time.time() - time_before
N+=1
return current_time/N
except:
raise ValueError("Invalid template")