No longer repair the GA ; kill the invalid mutants instead

This commit is contained in:
Philippe Tillet
2014-09-13 17:06:47 -04:00
parent 5ee9e7f994
commit c4c8404d40
4 changed files with 64 additions and 132 deletions

View File

@@ -48,10 +48,10 @@ TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
def parameter_space(operation): def parameter_space(operation):
simd = [1, 2, 4, 8] simd = [1, 2, 4, 8]
pow2_1D = [2**k for k in range(12)] pow2_1D = [2**k for k in range(12)]
pow2_2D = [8, 16] pow2_2D = [2**i for i in range(8)]
pow2_2D_unrolled = [1, 2, 4, 8] pow2_2D_unrolled = [2**i for i in range(8)]
FetchingPolicy = vcl.atidlas.FetchingPolicy FetchingPolicy = vcl.atidlas.FetchingPolicy
fetch = [FetchingPolicy.FETCH_FROM_LOCAL] fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch] if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch] if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch] if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
@@ -97,7 +97,7 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
fname = os.devnull fname = os.devnull
with open(fname, "w+") as archive: with open(fname, "w+") as archive:
with vcl.Statement(node) as statement: with vcl.Statement(node) as statement:
result = optimize.exhaustive(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), result = optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
TYPES[operation]['parameter-names'], parameter_space(operation), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, s, t]), TYPES[operation]['perf-measure'], archive) TYPES[operation]['parameter-names'], parameter_space(operation), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, s, t]), TYPES[operation]['perf-measure'], archive)
if result and viennacl_root: if result and viennacl_root:
vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1]) vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1])

View File

@@ -3,8 +3,8 @@ import time
import sys import sys
import tools import tools
import pyviennacl as vcl import pyviennacl as vcl
import numpy import numpy as np
import copy
from deap import algorithms from deap import algorithms
from collections import OrderedDict as odict from collections import OrderedDict as odict
@@ -28,6 +28,7 @@ class GeneticOperators(object):
self.ParameterType = TemplateType.Parameters self.ParameterType = TemplateType.Parameters
self.build_template = build_template self.build_template = build_template
self.cache = {} self.cache = {}
self.indpb = 0.15
def init(self): def init(self):
while True: while True:
@@ -40,121 +41,54 @@ class GeneticOperators(object):
if template.check(self.statement)==0 and occupancy_record.occupancy >= 10 : if template.check(self.statement)==0 and occupancy_record.occupancy >= 10 :
return result return result
@staticmethod def mutate(self, individual):
def min_to_hyperbol(a, tup): while True:
x = 1 new_individual = copy.deepcopy(individual)
for i in range(100): for i in new_individual:
dx = 2*(-a**2/x**3 + a*tup[1]/x**2 - tup[0] + x); if random.random() < self.indpb:
ddx = 6*a**2/x**4 - 4*a*tup[1]/x**3 + 2; coef = random.choice([1, 2])
if abs(dx) < 1e-7 or abs(ddx) < 1e-7: funs = [lambda x:max(1, x/coef), lambda x:x*coef]
break
x-=dx/ddx;
if x<1 or x>a:
x = max(1, min(x, a))
break
new_x = int(closest_divisor(a, x))
new_y = int(a / new_x)
return (new_x, new_y)
def repair(self,func):
def repair_impl(child):
D = odict(zip(self.parameter_names, child))
dummy_template = self.build_template(self.ParameterType(*D.values()))
FetchingPolicy = vcl.atidlas.FetchingPolicy;
D['local-size-0'] = max(1, D['local-size-0'])
D['local-size-1'] = max(1, D['local-size-1'])
if 'local-size-1' not in D:
D['local-size-0'] = min(D['local-size-0'], self.device.max_work_group_size)
elif D['local-size-0']*D['local-size-1'] > self.device.max_work_group_size:
res = GeneticOperators.min_to_hyperbol(self.device.max_work_group_size, (D['local-size-0'], D['local-size-1']))
D['local-size-0'] = res[0]
D['local-size-1'] = res[1]
if self.ParameterType is vcl.atidlas.MatrixProductTemplate.Parameters:
if dummy_template.A_trans != 'N' and dummy_template.B_trans != 'T':
D['simd-width'] = 1
D['kL'] = max(1, D['kL'])
D['kS'] = max(1, D['kS'])
D['mS'] = max(D['mS'], D['simd-width'])
D['nS'] = max(D['nS'], D['simd-width'])
D['mS'] = D['mS'] - D['mS']%D['simd-width']
D['nS'] = D['nS'] - D['nS']%D['simd-width']
if D['A-fetch-policy']!=FetchingPolicy.FETCH_FROM_LOCAL and D['B-fetch-policy']!=FetchingPolicy.FETCH_FROM_LOCAL:
D['local-fetch-size-0']=D['local-fetch-size-1']=0
else:
res = GeneticOperators.min_to_hyperbol(D['local-size-0']*D['local-size-1'], (D['local-fetch-size-0'], D['local-fetch-size-1']))
D['local-fetch-size-0'] = res[0]
D['local-fetch-size-1'] = res[1]
if D['A-fetch-policy']==FetchingPolicy.FETCH_FROM_LOCAL and dummy_template.A_trans=='N' and D['kL'] % D['local-fetch-size-1'] > 0:
D['kL'] = max(1,round(D['kL']/D['local-fetch-size-1']))*D['local-fetch-size-1']
if D['B-fetch-policy']==FetchingPolicy.FETCH_FROM_LOCAL and dummy_template.B_trans=='T' and D['kL'] % D['local-fetch-size-1'] > 0:
D['kL'] = max(1,round(D['kL']/D['local-fetch-size-1']))*D['local-fetch-size-1']
D['kS'] = min(D['kL'], D['kS'])
return D.values()
def wrappper(*args, **kargs):
offspring = func(*args, **kargs)
for child in offspring:
new_child = repair_impl(child)
for i in range(len(child)):
if child[i] != new_child[i]:
child[i] = new_child[i]
return offspring
return wrappper
def mutate(self, individual, indpb = 0.15):
for i in individual:
if random.random() < indpb:
coef = 2**(1 + numpy.random.poisson())
funs = [lambda x:x/coef, lambda x:x*coef]
F = random.choice(funs) F = random.choice(funs)
nF = funs[1] if F==funs[0] else funs[0] nF = funs[1] if F==funs[0] else funs[0]
#swapping-based mutations #swapping-based mutations
def m0(): def m0():
individual[1], individual[3] = individual[3], individual[1] new_individual[1], new_individual[3] = new_individual[3], new_individual[1]
def m1(): def m1():
individual[4], individual[6] = individual[6], individual[4] new_individual[4], new_individual[6] = new_individual[6], new_individual[4]
def m2(): def m2():
individual[9], individual[10] = individual[10], individual[9] new_individual[9], new_individual[10] = new_individual[10], new_individual[9]
#value modification mutations #value modification mutations
def m3(): def m3():
individual[0] = random.choice(self.parameters[0]) new_individual[0] = random.choice(self.parameters[0])
def m4(): def m4():
individual[1] = F(individual[1]) new_individual[1] = F(new_individual[1])
individual[9] = F(individual[9]) new_individual[9] = F(new_individual[9])
def m5(): def m5():
individual[2] = F(individual[2]) new_individual[2] = F(new_individual[2])
def m6(): def m6():
individual[3] = F(individual[3]) new_individual[3] = F(new_individual[3])
individual[10] = F(individual[10]) new_individual[10] = F(new_individual[10])
def m7(): def m7():
individual[4] = F(individual[4]) new_individual[4] = F(new_individual[4])
def m8(): def m8():
individual[5] = F(individual[5]) new_individual[5] = F(new_individual[5])
def m9(): def m9():
individual[6] = F(individual[6]) new_individual[6] = F(new_individual[6])
def m10(): def m10():
individual[7] = random.choice([x for x in self.parameters[7] if x!=individual[7]]) new_individual[7] = random.choice([x for x in self.parameters[7] if x!=new_individual[7]])
def m11(): def m11():
individual[8] = random.choice([x for x in self.parameters[8] if x!=individual[8]]) new_individual[8] = random.choice([x for x in self.parameters[8] if x!=new_individual[8]])
def m12(): def m12():
individual[9] = F(individual[9]) new_individual[9] = F(new_individual[9])
individual[10] = nF(individual[10]) new_individual[10] = nF(new_individual[10])
def m13(): def m13():
individual[10] = F(individual[10]) new_individual[10] = F(new_individual[10])
individual[9] = nF(individual[9]) new_individual[9] = nF(new_individual[9])
random.choice([m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13])() random.choice([m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13])()
return individual, template = self.build_template(self.TemplateType.Parameters(*new_individual))
if not tools.skip(template, self.statement, self.device):
break
return new_individual,
def evaluate(self, individual): def evaluate(self, individual):
if tuple(individual) not in self.cache: if tuple(individual) not in self.cache:

View File

@@ -38,21 +38,19 @@ def exhaustive(statement, context, TemplateType, build_template, parameter_names
def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out): def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
gen = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template) GA = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template)
creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin) creator.create("Individual", list, fitness=creator.FitnessMin)
toolbox = base.Toolbox() toolbox = base.Toolbox()
toolbox.register("individual", tools.initIterate, creator.Individual, gen.init) toolbox.register("individual", deap.tools.initIterate, creator.Individual, GA.init)
toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("population", deap.tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", gen.evaluate) toolbox.register("evaluate", GA.evaluate)
toolbox.register("mate", tools.cxTwoPoint) toolbox.register("mate", deap.tools.cxTwoPoint)
toolbox.decorate("mate", gen.repair) toolbox.register("mutate", GA.mutate)
toolbox.register("mutate", gen.mutate) toolbox.register("select", deap.tools.selBest)
toolbox.decorate("mutate", gen.repair)
toolbox.register("select", tools.selBest)
pop = toolbox.population(n=30) pop = toolbox.population(n=50)
hof = deap.tools.HallOfFame(1) hof = deap.tools.HallOfFame(1)
best_performer = lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x]) best_performer = lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x])
@@ -62,4 +60,4 @@ def genetic(statement, context, TemplateType, build_template, parameter_names, a
stats.register("max (" + perf_metric + ")", lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x])) stats.register("max (" + perf_metric + ")", lambda x: max([compute_perf(hof[0].fitness.values[0]) for t in x]))
stats.register("profile ", lambda x: '(%s)'%','.join(map(str,hof[0]))) stats.register("profile ", lambda x: '(%s)'%','.join(map(str,hof[0])))
pop = eaMuPlusLambda(pop, toolbox, 30, 50, cxpb=0.2, mutpb=0.3, maxtime='3m0s', maxgen=200, halloffame=hof, compute_perf=compute_perf, perf_metric=perf_metric) pop = eaMuPlusLambda(pop, toolbox, 50, 70, cxpb=0.2, mutpb=0.3, maxtime='5m0s', maxgen=500, halloffame=hof, compute_perf=compute_perf, perf_metric=perf_metric)

View File

@@ -79,7 +79,7 @@ class OccupancyRecord:
def __init__(self, dev, threads, shared_mem=0, registers=0): def __init__(self, dev, threads, shared_mem=0, registers=0):
physical_limits = PhysicalLimits(dev) physical_limits = PhysicalLimits(dev)
limits = []; limits = [];
allocated_warps = _int_ceiling(threads/physical_limits.threads_per_warp) allocated_warps = max(1,_int_ceiling(threads/physical_limits.threads_per_warp))
max_warps_per_mp = physical_limits.warps_per_mp; max_warps_per_mp = physical_limits.warps_per_mp;
limits.append((min(physical_limits.thread_blocks_per_mp, _int_floor(max_warps_per_mp/allocated_warps)), 'warps')) limits.append((min(physical_limits.thread_blocks_per_mp, _int_floor(max_warps_per_mp/allocated_warps)), 'warps'))