Porting GA for all the operations
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
#will save the archive into /tmp/name-of-operation.dat
|
||||
tmp-folder = /tmp/
|
||||
|
||||
#~ [vector-axpy]
|
||||
#~ devices = 0
|
||||
#~ precision = all
|
||||
#~ size = 10000000
|
||||
[vector-axpy]
|
||||
devices = 0
|
||||
precision = single
|
||||
size = 10000000
|
||||
|
||||
#~ [matrix-axpy]
|
||||
#~ devices = 0
|
||||
|
@@ -24,39 +24,35 @@ DATATYPES = { 'single' : vcl.float32,
|
||||
'double' : vcl.float64 }
|
||||
|
||||
TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
|
||||
'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
|
||||
'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9,
|
||||
'perf-measure':'GB/s'},
|
||||
|
||||
'matrix-axpy': {'template':vcl.atidlas.MatrixAxpyTemplate,
|
||||
'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'num-groups-1', 'fetch'],
|
||||
'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||
'perf-measure':'GB/s'},
|
||||
|
||||
'reduction': {'template':vcl.atidlas.ReductionTemplate,
|
||||
'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
|
||||
'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||
'perf-measure':'GB/s'},
|
||||
|
||||
'row-wise-reduction': {'template':vcl.atidlas.RowWiseReductionTemplate,
|
||||
'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'fetch'],
|
||||
'perf-index':lambda x: x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||
'perf-measure':'GB/s'},
|
||||
|
||||
'matrix-product': {'template':vcl.atidlas.MatrixProductTemplate,
|
||||
'parameter-names':['simd-width', 'local-size-0', 'kL', 'local-size-1', 'mS', 'kS', 'nS', 'A-fetch-policy', 'B-fetch-policy', 'local-fetch-size-0', 'local-fetch-size-1'],
|
||||
'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
|
||||
'perf-measure': 'GFLOP/s'} }
|
||||
|
||||
def do_tuning(config_fname, spec_fname, viennacl_root):
|
||||
config = ConfigObj(config_fname, configspec=spec_fname)
|
||||
map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]]))
|
||||
def map_to_list(T, x):
|
||||
return list(map(T, x if isinstance(x, list) else [x]))
|
||||
for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']:
|
||||
if operation in config:
|
||||
p = config[operation]
|
||||
confdevices = p['devices']
|
||||
devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
|
||||
precisions = map_to_list((str, p['precision']))
|
||||
precisions = map_to_list(str, p['precision'])
|
||||
datatypes = [DATATYPES[k] for k in precisions]
|
||||
#Iterate through the datatypes and the devices
|
||||
for datatype, device in itertools.product(datatypes, devices):
|
||||
@@ -68,18 +64,23 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
||||
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
|
||||
continue
|
||||
#Helper
|
||||
def execute(statement, other_params, sizes, fname = os.devnull):
|
||||
def execute(device, statement, other_params, sizes, fname = os.devnull, parameters = None):
|
||||
if parameters:
|
||||
TemplateType = TYPES[operation]['template']
|
||||
return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),*other_params), statement, device)
|
||||
print('-----')
|
||||
print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
|
||||
with open(fname, "w+") as archive:
|
||||
return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
||||
TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
||||
s = map_to_list((int, p['size']))
|
||||
return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
||||
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
||||
#Vector AXPY
|
||||
if operation=='vector-axpy':
|
||||
x = vcl.Vector(s[0], context=ctx, dtype=datatype)
|
||||
y = vcl.Vector(s[0], context=ctx, dtype=datatype)
|
||||
execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ())
|
||||
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||
x = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||
return execute(device, vcl.Statement(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters)
|
||||
if 'size' in p:
|
||||
profile = execution_handler(map_to_list(int, p['size']))
|
||||
#Matrix AXPY
|
||||
if operation=='matrix-axpy':
|
||||
A = vcl.Matrix(s, context=ctx, dtype=datatype)
|
||||
@@ -112,11 +113,10 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
||||
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
||||
C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
|
||||
statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta))
|
||||
if parameters:
|
||||
TemplateType = TYPES[operation]['template']
|
||||
return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),A_trans,B_trans), statement, device)
|
||||
return execute(device, statement,(A_trans, B_trans), sizes, fname, parameters)
|
||||
if 'size' in p:
|
||||
profile = execution_handler(map(int, p['size']))
|
||||
else:
|
||||
return execute(statement,(A_trans, B_trans), sizes, fname)
|
||||
X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler)
|
||||
train_model(X, Y, profiles)
|
||||
|
||||
|
@@ -28,7 +28,7 @@ def resample(X, tbincount, densities, step):
|
||||
return x.astype(int)
|
||||
|
||||
def generate_dataset(TemplateType, execution_handler):
|
||||
I = 10
|
||||
I = 50
|
||||
step = 64
|
||||
path = "./data"
|
||||
|
||||
@@ -47,13 +47,13 @@ def generate_dataset(TemplateType, execution_handler):
|
||||
# densities = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)];
|
||||
#
|
||||
# print "Generating the dataset..."
|
||||
# N = 1000
|
||||
# N = 10000
|
||||
# Y = np.empty((N, len(profiles)))
|
||||
# X = np.empty((N,3))
|
||||
# t = []
|
||||
#
|
||||
# for i in range(N):
|
||||
# x = resample(X, np.bincount(t), densities, step)
|
||||
# x = resample(X, [], [], step)
|
||||
# for j,y in enumerate(profiles):
|
||||
# T = execution_handler(x, os.devnull, decode(map(int, y)))
|
||||
# Y[i,j] = 2*1e-9*x[0]*x[1]*x[2]/T
|
||||
@@ -61,6 +61,9 @@ def generate_dataset(TemplateType, execution_handler):
|
||||
# X[i,:] = x
|
||||
# t = np.argmax(Y[:i+1,], axis=1)
|
||||
# densities[idx].fit(X[t==idx,:])
|
||||
# if i%10==0:
|
||||
# sys.stdout.write('%d data points generated\r'%i)
|
||||
# sys.stdout.flush()
|
||||
#
|
||||
# np.savetxt(os.path.join(path,"profiles.csv"), profiles)
|
||||
# np.savetxt(os.path.join(path,"X.csv"), X)
|
||||
|
@@ -33,10 +33,9 @@ def b_gray_to_bin(A='00000000', endian='big'):
|
||||
|
||||
class GeneticOperators(object):
|
||||
|
||||
def __init__(self, device, statement, parameter_names, TemplateType, build_template, out):
|
||||
def __init__(self, device, statement, TemplateType, build_template, out):
|
||||
self.device = device
|
||||
self.statement = statement
|
||||
self.parameter_names = parameter_names
|
||||
self.TemplateType = TemplateType
|
||||
self.ParameterType = TemplateType.Parameters
|
||||
self.build_template = build_template
|
||||
@@ -44,6 +43,11 @@ class GeneticOperators(object):
|
||||
self.indpb = 0.05
|
||||
self.out = out
|
||||
|
||||
self.genome_info = {
|
||||
vcl.atidlas.VectorAxpyTemplate: [3,4,4,vcl.atidlas.FetchingPolicy],
|
||||
vcl.atidlas.MatrixProductTemplate: [3,3,3,3,3,3,3,vcl.atidlas.FetchingPolicy,vcl.atidlas.FetchingPolicy,3]
|
||||
}[TemplateType]
|
||||
|
||||
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
|
||||
creator.create("Individual", list, fitness=creator.FitnessMin)
|
||||
|
||||
@@ -54,35 +58,39 @@ class GeneticOperators(object):
|
||||
self.toolbox.register("mutate", self.mutate)
|
||||
self.toolbox.register("select", deap_tools.selNSGA2)
|
||||
|
||||
@staticmethod
|
||||
def decode(s):
|
||||
def decode(self, genome):
|
||||
FetchingPolicy = vcl.atidlas.FetchingPolicy
|
||||
fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
|
||||
fetchA = fetch[s[0]]
|
||||
fetchB = fetch[s[1]]
|
||||
bincode = ''.join(s[2:])
|
||||
decode_element = lambda x:2**int(b_gray_to_bin(x), 2)
|
||||
simd = decode_element(bincode[0:3])
|
||||
ls0 = decode_element(bincode[2:5])
|
||||
ls1 = decode_element(bincode[5:8])
|
||||
kL = decode_element(bincode[8:11])
|
||||
mS = decode_element(bincode[11:14])
|
||||
kS = decode_element(bincode[14:17])
|
||||
nS = decode_element(bincode[17:20])
|
||||
if fetchA==FetchingPolicy.FETCH_FROM_LOCAL or fetchB==FetchingPolicy.FETCH_FROM_LOCAL:
|
||||
lf0 = decode_element(bincode[20:23])
|
||||
lf1 = ls0*ls1/lf0
|
||||
decode_element = lambda x:2**int(b_gray_to_bin(''.join(x)), 2)
|
||||
result = []
|
||||
offset = 0
|
||||
for x in self.genome_info:
|
||||
if x==vcl.atidlas.FetchingPolicy:
|
||||
result.append(fetch[genome[offset]])
|
||||
offset = offset + 1
|
||||
else:
|
||||
lf0, lf1 = 0, 0
|
||||
return [simd, ls0, kL, ls1, mS, kS, nS, fetchA, fetchB, lf0, lf1]
|
||||
result.append(decode_element(genome[offset:offset+x]))
|
||||
offset = offset + x
|
||||
#GEMM peculiarities
|
||||
if self.TemplateType==vcl.atidlas.MatrixProductTemplate:
|
||||
if FetchingPolicy.FETCH_FROM_LOCAL in result:
|
||||
lf1 = result[1]*result[3]/result[9]
|
||||
else:
|
||||
result[9] = 0
|
||||
lf1 = 0
|
||||
result.append(lf1)
|
||||
return result
|
||||
|
||||
def init(self, N):
|
||||
result = []
|
||||
fetchcount = [0, 0, 0]
|
||||
while len(result) < N:
|
||||
while True:
|
||||
fetch = random.randint(0,2)
|
||||
bincode = [fetch, fetch] + [str(random.randint(0,1)) for i in range(23)]
|
||||
bincode = []
|
||||
for x in self.genome_info:
|
||||
if x==vcl.atidlas.FetchingPolicy:
|
||||
bincode = bincode + [random.randint(0,2)]
|
||||
else:
|
||||
bincode = bincode + [str(random.randint(0,1)) for i in range(x)]
|
||||
parameters = self.decode(bincode)
|
||||
template = self.build_template(self.TemplateType.Parameters(*parameters))
|
||||
registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4
|
||||
@@ -90,22 +98,18 @@ class GeneticOperators(object):
|
||||
local_size = template.parameters.local_size_0*template.parameters.local_size_1
|
||||
occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage)
|
||||
if not tools.skip(template, self.statement, self.device):
|
||||
fetchcount[fetch] = fetchcount[fetch] + 1
|
||||
if max(fetchcount) - min(fetchcount) <= 1:
|
||||
result.append(creator.Individual(bincode))
|
||||
break
|
||||
else:
|
||||
fetchcount[fetch] = fetchcount[fetch] - 1
|
||||
return result
|
||||
|
||||
def mutate(self, individual):
|
||||
while True:
|
||||
new_individual = copy.deepcopy(individual)
|
||||
for i in range(len(new_individual)):
|
||||
if i < 2 and random.random() < self.indpb:
|
||||
if isinstance(individual[i], int) and random.random() < self.indpb:
|
||||
while new_individual[i] == individual[i]:
|
||||
new_individual[i] = random.randint(0, 2)
|
||||
elif i >= 2 and random.random() < self.indpb:
|
||||
elif not isinstance(individual[i], int) and random.random() < self.indpb:
|
||||
new_individual[i] = '1' if new_individual[i]=='0' else '0'
|
||||
parameters = self.decode(new_individual)
|
||||
template = self.build_template(self.TemplateType.Parameters(*parameters))
|
||||
@@ -176,7 +180,7 @@ class GeneticOperators(object):
|
||||
population[:] = self.toolbox.select(population + offspring, mu)
|
||||
#Update
|
||||
gen = gen + 1
|
||||
best_profile = '(%s)'%','.join(map(str,GeneticOperators.decode(hof[0])));
|
||||
best_profile = '(%s)'%','.join(map(str,self.decode(hof[0])));
|
||||
best_performance = compute_perf(hof[0].fitness.values[0])
|
||||
sys.stdout.write('Time %d | Best %d %s [ for %s ]\r'%(time.time() - start_time, best_performance, perf_metric, best_profile))
|
||||
sys.stdout.flush()
|
||||
|
@@ -13,12 +13,13 @@ def train_model(X, Y, profiles):
|
||||
Xmean = np.mean(X, axis=0)
|
||||
Xstd = np.std(X, axis=0)
|
||||
X = (X - Xmean)/Xstd
|
||||
|
||||
Ymax = np.max(Y)
|
||||
Y = Y/Ymax
|
||||
|
||||
ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile
|
||||
#Cross-validation data-sets
|
||||
cut = int(0.1*X.shape[0]+1)
|
||||
cut = int(0.800*X.shape[0]+1)
|
||||
XTr = X[0:cut, :]
|
||||
YTr = Y[0:cut, :]
|
||||
XTe = X[cut:,:]
|
||||
@@ -26,23 +27,15 @@ def train_model(X, Y, profiles):
|
||||
|
||||
#Train the model
|
||||
print("Training the model...")
|
||||
ds = SupervisedDataSet(X.shape[1], Y.shape[1])
|
||||
for idx, x in enumerate(X):
|
||||
ds.addSample(x, Y[idx,:])
|
||||
clf = buildNetwork(*[X.shape[1], 100, Y.shape[1]], hiddenclass = TanhLayer, outclass = LinearLayer)
|
||||
#print fnn;
|
||||
#trainer = RPropMinusTrainer( fnn, dataset=ds, verbose=True);
|
||||
trainer = BackpropTrainer( clf, dataset=ds, verbose=True, momentum=0.01, weightdecay=0.01, learningrate=0.002, batchlearning=False)
|
||||
trainer.trainUntilConvergence(maxEpochs=100)
|
||||
clf = ensemble.RandomForestRegressor(40).fit(XTr,YTr)
|
||||
|
||||
#Evaluate the model
|
||||
GFlops = np.empty(XTe.shape[0])
|
||||
speedups = np.empty(XTe.shape[0])
|
||||
optspeedups = np.empty(XTe.shape[0])
|
||||
for i,x in enumerate(XTe):
|
||||
predictions = clf.activate(x)
|
||||
predictions = clf.predict(x)
|
||||
label = np.argmax(predictions)
|
||||
# print YTe[i,label], YTe[i,ref], np.max(YTe[i,:])
|
||||
speedups[i] = YTe[i,label]/YTe[i,ref]
|
||||
optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]
|
||||
GFlops[i] = YTe[i,ref]*Ymax
|
||||
@@ -52,7 +45,5 @@ def train_model(X, Y, profiles):
|
||||
print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups)))
|
||||
print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups))))
|
||||
print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)]))
|
||||
print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)]))
|
||||
print("Maximum speedup is %f wrt %i GFlops for %s"%(np.max(speedups), GFlops[np.argmax(speedups)], X[np.argmax(speedups)]*Xstd+Xmean))
|
||||
print("--------")
|
||||
|
||||
print clf
|
||||
|
@@ -48,6 +48,6 @@ from genetic import GeneticOperators
|
||||
#~ sys.stdout.flush()
|
||||
#~
|
||||
|
||||
def genetic(statement, context, TemplateType, build_template, parameter_names, compute_perf, perf_metric, out):
|
||||
GA = GeneticOperators(context.devices[0], statement, parameter_names, TemplateType, build_template, out)
|
||||
def genetic(statement, device, TemplateType, build_template, compute_perf, perf_metric, out):
|
||||
GA = GeneticOperators(device, statement, TemplateType, build_template, out)
|
||||
return GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)
|
||||
|
Reference in New Issue
Block a user