Dataset generation

This commit is contained in:
Philippe Tillet
2014-09-27 20:54:17 -04:00
parent 02d39ed71b
commit 693b8b67b0
4 changed files with 210 additions and 127 deletions

View File

@@ -11,6 +11,7 @@ import pyviennacl as vcl
from pyviennacl import backend from pyviennacl import backend
from pyviennacl import opencl from pyviennacl import opencl
from pyviennacl import atidlas from pyviennacl import atidlas
from dataset import generate_dataset
import utils import utils
import vclio import vclio
@@ -45,73 +46,45 @@ TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9, 'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
'perf-measure': 'GFLOP/s'} } 'perf-measure': 'GFLOP/s'} }
def parameter_space(operation):
simd = [1, 2, 4, 8]
pow2_1D = [2**k for k in range(12)]
pow2_2D = [2**i for i in range(8)]
pow2_2D_unrolled = [2**i for i in range(8)]
FetchingPolicy = vcl.atidlas.FetchingPolicy
fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
if operation == 'row-wise-reduction': return [simd, pow2_2D, pow2_2D, pow2_1D, fetch]
if operation == 'matrix-product': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D_unrolled, pow2_2D_unrolled, pow2_2D_unrolled, fetch, fetch, [0] + pow2_2D, [0] + pow2_2D]
def do_tuning(config_fname, spec_fname, viennacl_root): def do_tuning(config_fname, spec_fname, viennacl_root):
config = ConfigObj(config_fname, configspec=spec_fname) config = ConfigObj(config_fname, configspec=spec_fname)
map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]])) map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]]))
for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']: for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']:
tmp_folder = config['tmp-folder'] if 'tmp-folder' in config else ""
if operation in config: if operation in config:
p = config[operation] p = config[operation]
confdevices = p['devices'] confdevices = p['devices']
devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices] devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
precisions = map_to_list((str, p['precision'])) precisions = map_to_list((str, p['precision']))
datatypes = [DATATYPES[k] for k in precisions] datatypes = [DATATYPES[k] for k in precisions]
s = map_to_list((int, p['size'])) #Iterate through the datatypes and the devices
for datatype, device in itertools.product(datatypes, devices): for datatype, device in itertools.product(datatypes, devices):
ctx = cl.Context([device]) ctx = cl.Context([device])
ctx = vcl.backend.Context(ctx) ctx = vcl.backend.Context(ctx)
device = ctx.current_device device = ctx.current_device
#Check data-type
if datatype is vcl.float64 and not device.double_fp_config: if datatype is vcl.float64 and not device.double_fp_config:
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...') sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
continue continue
#Helper
pairs = [] def execute(node, other_params, sizes, fname = os.devnull):
def execute(node, other_params):
print('-----') print('-----')
print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')]')))) print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
tmp_file = os.path.join(tmp_folder, utils.sanitize_string(device.name) + "-" + datatype.__name__ + "-" + operation + '-'.join(other_params) + ".dat")
if tmp_folder:
print('Saving history to ' + tmp_file)
fname = tmp_file
else:
fname = os.devnull
with open(fname, "w+") as archive: with open(fname, "w+") as archive:
with vcl.Statement(node) as statement: with vcl.Statement(node) as statement:
result = optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
TYPES[operation]['parameter-names'], parameter_space(operation), lambda t: TYPES[operation]['perf-index']([datatype().itemsize, s, t]), TYPES[operation]['perf-measure'], archive) TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
if result and viennacl_root: s = map_to_list((int, p['size']))
vclio.generate_viennacl_headers(viennacl_root, device, datatype, operation, other_params, result[1]) #Vector AXPY
if operation=='vector-axpy': if operation=='vector-axpy':
x = vcl.Vector(s[0], context=ctx, dtype=datatype) x = vcl.Vector(s[0], context=ctx, dtype=datatype)
y = vcl.Vector(s[0], context=ctx, dtype=datatype) y = vcl.Vector(s[0], context=ctx, dtype=datatype)
execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ()) execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ())
#Matrix AXPY
if operation=='matrix-axpy': if operation=='matrix-axpy':
A = vcl.Matrix(s, context=ctx, dtype=datatype) A = vcl.Matrix(s, context=ctx, dtype=datatype)
B = vcl.Matrix(s, context=ctx, dtype=datatype) B = vcl.Matrix(s, context=ctx, dtype=datatype)
execute(A+B, ()) execute(A+B, ())
#Row-wise reduction
if operation=='row-wise-reduction': if operation=='row-wise-reduction':
layouts = map_to_list((str,p['layout'])) layouts = map_to_list((str,p['layout']))
if 'all' in layouts: if 'all' in layouts:
@@ -121,23 +94,24 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
x = vcl.Vector(s[1] if A_trans=='N' else s[0], context=ctx, dtype=datatype) x = vcl.Vector(s[1] if A_trans=='N' else s[0], context=ctx, dtype=datatype)
LHS = A if A_trans=='N' else A.T LHS = A if A_trans=='N' else A.T
execute(LHS*x, ()) execute(LHS*x, ())
#Matrix Product
if operation=='matrix-product': if operation=='matrix-product':
layouts = map_to_list((str,p['layout'])) layouts = map_to_list((str,p['layout']))
if 'all' in layouts: if 'all' in layouts:
layouts = ['NN', 'NT', 'TN', 'TT'] layouts = ['NN', 'NT', 'TN', 'TT']
for layout in layouts: for layout in layouts:
def execution_handler(sizes, fname):
A_trans = layout[0] A_trans = layout[0]
B_trans = layout[1] B_trans = layout[1]
A = vcl.Matrix((sizes[0], sizes[1]) if A_trans=='N' else (sizes[1],sizes[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
A = vcl.Matrix((s[0], s[1]) if A_trans=='N' else (s[1],s[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR); B = vcl.Matrix((sizes[1], sizes[2]) if B_trans=='N' else (sizes[2],sizes[1]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
B = vcl.Matrix((s[1], s[2]) if B_trans=='N' else (s[2],s[1]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
LHS = A if A_trans=='N' else A.T LHS = A if A_trans=='N' else A.T
RHS = B if B_trans=='N' else B.T RHS = B if B_trans=='N' else B.T
alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype) alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype) beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
C = vcl.Matrix((s[0], s[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR) C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
execute(vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans)) execute(vcl.Assign(C,LHS*RHS*alpha + C*beta),(A_trans, B_trans), sizes, fname)
generate_dataset(operation, execution_handler)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -0,0 +1,99 @@
import os
import re
import random
import numpy as np
from sklearn.neighbors.kde import KernelDensity;
def generate_dataset(operation, execution_handler):
I = 5
step = 64;
max_size = 4000;
#Retrieves the existing data
print "Retrieving data..."
path = "./data"
files = os.listdir(path)
X = np.empty((len(files),3))
t = np.empty(len(files))
profiles = []
nonemptyfiles = []
for i,fname in enumerate(files):
if os.path.getsize(os.path.join(path,fname))>0:
nonemptyfiles.append(fname)
files = nonemptyfiles
for i,fname in enumerate(files):
MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname)
fl = open(os.path.join(path,fname),"rb")
A = np.loadtxt(fl,delimiter=',')
x = np.array([MNK.group(1), MNK.group(2), MNK.group(3)]).astype(float)
y = tuple(A[np.argmin(A[:,0]),1:])
if y not in profiles:
profiles.append(y)
idx = profiles.index(y)
X[i,:] = x
t[i] = idx
#Generates new data
print "Generating new data..."
kdes = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)] if files else [];
X.resize((len(files)+I, 3), refcheck=False);
t.resize(len(files)+I, refcheck=False);
max_square = max_size/step
for i in range(I):
n_per_label = np.bincount(t[0:i+1].astype(int));
Xtuples = [tuple(x) for x in X];
r = random.random();
while(True):
if(len(kdes)==0 or r<=1.0/len(kdes)):
x = np.array([step*random.randint(1,40), step*random.randint(1,40), step*random.randint(1,40)]);
else:
probs = (1.0/n_per_label)
distr = np.random.choice(range(n_per_label.size), p = probs/np.sum(probs))
x = kdes[distr].sample()[0]
x = np.maximum(np.ones(x.shape),(x - step/2).astype(int)/step + 1)*step
if tuple(x) not in Xtuples:
break;
x = x.astype(int)
x = [2048, 2048, 512]
fname = os.path.join(path, `x[0]` +"-"+ `x[1]` +"-"+ `x[2]` +".csv")
#Execute auto-tuning procedure
execution_handler(x, fname)
#Load csv into matrix
fl = open(fname,"rb");
A = np.loadtxt(fl,delimiter=',');
#Update the kernel density estimators
y = tuple(A[np.argmin(A[:,0]),1:]);
if y not in profiles:
profiles.append(y);
kdes.append(KernelDensity(kernel='gaussian', bandwidth=2*step));
idx = profiles.index(y);
#Update data
X[len(files)+i,:] = x;
t[len(files)+i] = idx;
#Update density estimator p(M,N,K | t=idx)
kdes[idx].fit(X[t[0:len(files)+i+1]==idx,:]);
print "Exporting data...";
#Shuffle the list of file
files = os.listdir(path)
random.shuffle(files)
X = np.empty((len(files),3))
Y = np.zeros((len(files), len(profiles)))
for i,fname in enumerate(files):
MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname)
X[i,:] = map(float,[MNK.group(k) for k in range(1,4)])
fl = open(os.path.join(path,fname),"rb");
A = np.loadtxt(fl,delimiter=',')
for j,y in enumerate(profiles):
idx = np.where(np.all(A[:,1:]==y,axis=1))[0]
if idx.size:
Y[i,j] = 2*1e-9*X[i,0]*X[i,1]*X[i,2]/A[idx[0],0]
else:
sys.exit('Data invalid! Were all the data csv files generated using the same auto-tuner options?')
np.savetxt(export_path+'X.csv', X)
np.savetxt(export_path+'Y.csv', Y)
np.savetxt(export_path+'profiles.csv', profiles)
open(export_path+'pad.csv', 'w').write(str(pad))

View File

@@ -13,12 +13,6 @@ from deap import tools as deap_tools
from collections import OrderedDict as odict from collections import OrderedDict as odict
def hamming_distance(ind1, ind2):
res = 0
for x,y in enumerate(ind1, ind2):
if x==y:
res = res + 1
return res
def closest_divisor(N, x): def closest_divisor(N, x):
x_low=x_high=max(1,min(round(x),N)) x_low=x_high=max(1,min(round(x),N))
@@ -39,16 +33,16 @@ def b_gray_to_bin(A='00000000', endian='big'):
class GeneticOperators(object): class GeneticOperators(object):
def __init__(self, device, statement, parameters, parameter_names, TemplateType, build_template): def __init__(self, device, statement, parameter_names, TemplateType, build_template, out):
self.device = device self.device = device
self.statement = statement self.statement = statement
self.parameters = parameters
self.parameter_names = parameter_names self.parameter_names = parameter_names
self.TemplateType = TemplateType self.TemplateType = TemplateType
self.ParameterType = TemplateType.Parameters self.ParameterType = TemplateType.Parameters
self.build_template = build_template self.build_template = build_template
self.cache = {} self.cache = {}
self.indpb = 0.05 self.indpb = 0.05
self.out = out
creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin) creator.create("Individual", list, fitness=creator.FitnessMin)
@@ -108,7 +102,7 @@ class GeneticOperators(object):
while True: while True:
new_individual = copy.deepcopy(individual) new_individual = copy.deepcopy(individual)
for i in range(len(new_individual)): for i in range(len(new_individual)):
if i < 2 and random.random() < 0.2: if i < 2 and random.random() < self.indpb:
while new_individual[i] == individual[i]: while new_individual[i] == individual[i]:
new_individual[i] = random.randint(0, 2) new_individual[i] = random.randint(0, 2)
elif i >= 2 and random.random() < self.indpb: elif i >= 2 and random.random() < self.indpb:
@@ -125,7 +119,9 @@ class GeneticOperators(object):
parameters = self.decode(individual) parameters = self.decode(individual)
template = self.build_template(self.TemplateType.Parameters(*parameters)) template = self.build_template(self.TemplateType.Parameters(*parameters))
try: try:
self.cache[tuple(individual)] = tools.benchmark(template, self.statement, self.device) tt = tools.benchmark(template, self.statement, self.device)
self.out.write(','.join([str(tt)]+map(str,map(int,parameters)))+'\n')
self.cache[tuple(individual)] = tt
except: except:
self.cache[tuple(individual)] = 10 self.cache[tuple(individual)] = 10
return self.cache[tuple(individual)], return self.cache[tuple(individual)],

View File

@@ -9,31 +9,45 @@ import deap.tools
from genetic import GeneticOperators from genetic import GeneticOperators
def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out): #~ def parameter_space(operation):
device = context.devices[0] #~ simd = [1, 2, 4, 8]
nvalid = 0 #~ pow2_1D = [2**k for k in range(12)]
current = 0 #~ pow2_2D = [2**i for i in range(8)]
minT = float('inf') #~ pow2_2D_unrolled = [2**i for i in range(8)]
for individual in itertools.product(*all_parameters): #~ FetchingPolicy = vcl.atidlas.FetchingPolicy
template = build_template(TemplateType.Parameters(*individual)) #~ fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
if not tools.skip(template, statement, device): #~ if operation == 'vector-axpy': return [simd, pow2_1D, pow2_1D, fetch]
nvalid = nvalid + 1 #~ if operation == 'reduction': return [simd, pow2_1D, pow2_1D, fetch]
for individual in itertools.product(*all_parameters): #~ if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
template = build_template(TemplateType.Parameters(*individual)) #~ if operation == 'row-wise-reduction': return [simd, pow2_2D, pow2_2D, pow2_1D, fetch]
try: #~ if operation == 'matrix-product': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D_unrolled, pow2_2D_unrolled, pow2_2D_unrolled, fetch, fetch, [0] + pow2_2D, [0] + pow2_2D]
T = tools.benchmark(template,statement,device) #~
current = current + 1
if T < minT:
minT = T
best = individual
sys.stdout.write('%d / %d , Best is %d %s for %s\r'%(current, nvalid, compute_perf(minT), perf_metric, best))
sys.stdout.flush()
except:
pass
sys.stdout.write('\n')
sys.stdout.flush()
#~ def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
#~ device = context.devices[0]
#~ nvalid = 0
#~ current = 0
#~ minT = float('inf')
#~ for individual in itertools.product(*all_parameters):
#~ template = build_template(TemplateType.Parameters(*individual))
#~ if not tools.skip(template, statement, device):
#~ nvalid = nvalid + 1
#~ for individual in itertools.product(*all_parameters):
#~ template = build_template(TemplateType.Parameters(*individual))
#~ try:
#~ T = tools.benchmark(template,statement,device)
#~ current = current + 1
#~ if T < minT:
#~ minT = T
#~ best = individual
#~ sys.stdout.write('%d / %d , Best is %d %s for %s\r'%(current, nvalid, compute_perf(minT), perf_metric, best))
#~ sys.stdout.flush()
#~ except:
#~ pass
#~ sys.stdout.write('\n')
#~ sys.stdout.flush()
#~
def genetic(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out): def genetic(statement, context, TemplateType, build_template, parameter_names, compute_perf, perf_metric, out):
GA = GeneticOperators(context.devices[0], statement, all_parameters, parameter_names, TemplateType, build_template) GA = GeneticOperators(context.devices[0], statement, parameter_names, TemplateType, build_template, out)
GA.optimize(maxtime='5m0s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric) GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)