Porting GA for all the operations
This commit is contained in:
@@ -1,10 +1,10 @@
|
|||||||
#will save the archive into /tmp/name-of-operation.dat
|
#will save the archive into /tmp/name-of-operation.dat
|
||||||
tmp-folder = /tmp/
|
tmp-folder = /tmp/
|
||||||
|
|
||||||
#~ [vector-axpy]
|
[vector-axpy]
|
||||||
#~ devices = 0
|
devices = 0
|
||||||
#~ precision = all
|
precision = single
|
||||||
#~ size = 10000000
|
size = 10000000
|
||||||
|
|
||||||
#~ [matrix-axpy]
|
#~ [matrix-axpy]
|
||||||
#~ devices = 0
|
#~ devices = 0
|
||||||
|
@@ -24,39 +24,35 @@ DATATYPES = { 'single' : vcl.float32,
|
|||||||
'double' : vcl.float64 }
|
'double' : vcl.float64 }
|
||||||
|
|
||||||
TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
|
TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
|
||||||
'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
|
|
||||||
'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9,
|
'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9,
|
||||||
'perf-measure':'GB/s'},
|
'perf-measure':'GB/s'},
|
||||||
|
|
||||||
'matrix-axpy': {'template':vcl.atidlas.MatrixAxpyTemplate,
|
'matrix-axpy': {'template':vcl.atidlas.MatrixAxpyTemplate,
|
||||||
'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'num-groups-1', 'fetch'],
|
|
||||||
'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||||
'perf-measure':'GB/s'},
|
'perf-measure':'GB/s'},
|
||||||
|
|
||||||
'reduction': {'template':vcl.atidlas.ReductionTemplate,
|
'reduction': {'template':vcl.atidlas.ReductionTemplate,
|
||||||
'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
|
|
||||||
'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||||
'perf-measure':'GB/s'},
|
'perf-measure':'GB/s'},
|
||||||
|
|
||||||
'row-wise-reduction': {'template':vcl.atidlas.RowWiseReductionTemplate,
|
'row-wise-reduction': {'template':vcl.atidlas.RowWiseReductionTemplate,
|
||||||
'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'fetch'],
|
|
||||||
'perf-index':lambda x: x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
'perf-index':lambda x: x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
|
||||||
'perf-measure':'GB/s'},
|
'perf-measure':'GB/s'},
|
||||||
|
|
||||||
'matrix-product': {'template':vcl.atidlas.MatrixProductTemplate,
|
'matrix-product': {'template':vcl.atidlas.MatrixProductTemplate,
|
||||||
'parameter-names':['simd-width', 'local-size-0', 'kL', 'local-size-1', 'mS', 'kS', 'nS', 'A-fetch-policy', 'B-fetch-policy', 'local-fetch-size-0', 'local-fetch-size-1'],
|
|
||||||
'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
|
'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
|
||||||
'perf-measure': 'GFLOP/s'} }
|
'perf-measure': 'GFLOP/s'} }
|
||||||
|
|
||||||
def do_tuning(config_fname, spec_fname, viennacl_root):
|
def do_tuning(config_fname, spec_fname, viennacl_root):
|
||||||
config = ConfigObj(config_fname, configspec=spec_fname)
|
config = ConfigObj(config_fname, configspec=spec_fname)
|
||||||
map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]]))
|
def map_to_list(T, x):
|
||||||
|
return list(map(T, x if isinstance(x, list) else [x]))
|
||||||
for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']:
|
for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']:
|
||||||
if operation in config:
|
if operation in config:
|
||||||
p = config[operation]
|
p = config[operation]
|
||||||
confdevices = p['devices']
|
confdevices = p['devices']
|
||||||
devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
|
devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
|
||||||
precisions = map_to_list((str, p['precision']))
|
precisions = map_to_list(str, p['precision'])
|
||||||
datatypes = [DATATYPES[k] for k in precisions]
|
datatypes = [DATATYPES[k] for k in precisions]
|
||||||
#Iterate through the datatypes and the devices
|
#Iterate through the datatypes and the devices
|
||||||
for datatype, device in itertools.product(datatypes, devices):
|
for datatype, device in itertools.product(datatypes, devices):
|
||||||
@@ -68,18 +64,23 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
|
sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
|
||||||
continue
|
continue
|
||||||
#Helper
|
#Helper
|
||||||
def execute(statement, other_params, sizes, fname = os.devnull):
|
def execute(device, statement, other_params, sizes, fname = os.devnull, parameters = None):
|
||||||
|
if parameters:
|
||||||
|
TemplateType = TYPES[operation]['template']
|
||||||
|
return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),*other_params), statement, device)
|
||||||
print('-----')
|
print('-----')
|
||||||
print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
|
print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
|
||||||
with open(fname, "w+") as archive:
|
with open(fname, "w+") as archive:
|
||||||
return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
return optimize.genetic(statement, device, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
|
||||||
TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
|
||||||
s = map_to_list((int, p['size']))
|
|
||||||
#Vector AXPY
|
#Vector AXPY
|
||||||
if operation=='vector-axpy':
|
if operation=='vector-axpy':
|
||||||
x = vcl.Vector(s[0], context=ctx, dtype=datatype)
|
def execution_handler(sizes, fname=os.devnull, parameters=None):
|
||||||
y = vcl.Vector(s[0], context=ctx, dtype=datatype)
|
x = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||||
execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ())
|
y = vcl.Vector(sizes[0], context=ctx, dtype=datatype)
|
||||||
|
return execute(device, vcl.Statement(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y))), (), sizes, fname, parameters)
|
||||||
|
if 'size' in p:
|
||||||
|
profile = execution_handler(map_to_list(int, p['size']))
|
||||||
#Matrix AXPY
|
#Matrix AXPY
|
||||||
if operation=='matrix-axpy':
|
if operation=='matrix-axpy':
|
||||||
A = vcl.Matrix(s, context=ctx, dtype=datatype)
|
A = vcl.Matrix(s, context=ctx, dtype=datatype)
|
||||||
@@ -112,11 +113,10 @@ def do_tuning(config_fname, spec_fname, viennacl_root):
|
|||||||
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
|
||||||
C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
|
C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
|
||||||
statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta))
|
statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta))
|
||||||
if parameters:
|
return execute(device, statement,(A_trans, B_trans), sizes, fname, parameters)
|
||||||
TemplateType = TYPES[operation]['template']
|
if 'size' in p:
|
||||||
return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),A_trans,B_trans), statement, device)
|
profile = execution_handler(map(int, p['size']))
|
||||||
else:
|
else:
|
||||||
return execute(statement,(A_trans, B_trans), sizes, fname)
|
|
||||||
X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler)
|
X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler)
|
||||||
train_model(X, Y, profiles)
|
train_model(X, Y, profiles)
|
||||||
|
|
||||||
|
@@ -28,7 +28,7 @@ def resample(X, tbincount, densities, step):
|
|||||||
return x.astype(int)
|
return x.astype(int)
|
||||||
|
|
||||||
def generate_dataset(TemplateType, execution_handler):
|
def generate_dataset(TemplateType, execution_handler):
|
||||||
I = 10
|
I = 50
|
||||||
step = 64
|
step = 64
|
||||||
path = "./data"
|
path = "./data"
|
||||||
|
|
||||||
@@ -47,13 +47,13 @@ def generate_dataset(TemplateType, execution_handler):
|
|||||||
# densities = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)];
|
# densities = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)];
|
||||||
#
|
#
|
||||||
# print "Generating the dataset..."
|
# print "Generating the dataset..."
|
||||||
# N = 1000
|
# N = 10000
|
||||||
# Y = np.empty((N, len(profiles)))
|
# Y = np.empty((N, len(profiles)))
|
||||||
# X = np.empty((N,3))
|
# X = np.empty((N,3))
|
||||||
# t = []
|
# t = []
|
||||||
#
|
#
|
||||||
# for i in range(N):
|
# for i in range(N):
|
||||||
# x = resample(X, np.bincount(t), densities, step)
|
# x = resample(X, [], [], step)
|
||||||
# for j,y in enumerate(profiles):
|
# for j,y in enumerate(profiles):
|
||||||
# T = execution_handler(x, os.devnull, decode(map(int, y)))
|
# T = execution_handler(x, os.devnull, decode(map(int, y)))
|
||||||
# Y[i,j] = 2*1e-9*x[0]*x[1]*x[2]/T
|
# Y[i,j] = 2*1e-9*x[0]*x[1]*x[2]/T
|
||||||
@@ -61,6 +61,9 @@ def generate_dataset(TemplateType, execution_handler):
|
|||||||
# X[i,:] = x
|
# X[i,:] = x
|
||||||
# t = np.argmax(Y[:i+1,], axis=1)
|
# t = np.argmax(Y[:i+1,], axis=1)
|
||||||
# densities[idx].fit(X[t==idx,:])
|
# densities[idx].fit(X[t==idx,:])
|
||||||
|
# if i%10==0:
|
||||||
|
# sys.stdout.write('%d data points generated\r'%i)
|
||||||
|
# sys.stdout.flush()
|
||||||
#
|
#
|
||||||
# np.savetxt(os.path.join(path,"profiles.csv"), profiles)
|
# np.savetxt(os.path.join(path,"profiles.csv"), profiles)
|
||||||
# np.savetxt(os.path.join(path,"X.csv"), X)
|
# np.savetxt(os.path.join(path,"X.csv"), X)
|
||||||
|
@@ -33,10 +33,9 @@ def b_gray_to_bin(A='00000000', endian='big'):
|
|||||||
|
|
||||||
class GeneticOperators(object):
|
class GeneticOperators(object):
|
||||||
|
|
||||||
def __init__(self, device, statement, parameter_names, TemplateType, build_template, out):
|
def __init__(self, device, statement, TemplateType, build_template, out):
|
||||||
self.device = device
|
self.device = device
|
||||||
self.statement = statement
|
self.statement = statement
|
||||||
self.parameter_names = parameter_names
|
|
||||||
self.TemplateType = TemplateType
|
self.TemplateType = TemplateType
|
||||||
self.ParameterType = TemplateType.Parameters
|
self.ParameterType = TemplateType.Parameters
|
||||||
self.build_template = build_template
|
self.build_template = build_template
|
||||||
@@ -44,6 +43,11 @@ class GeneticOperators(object):
|
|||||||
self.indpb = 0.05
|
self.indpb = 0.05
|
||||||
self.out = out
|
self.out = out
|
||||||
|
|
||||||
|
self.genome_info = {
|
||||||
|
vcl.atidlas.VectorAxpyTemplate: [3,4,4,vcl.atidlas.FetchingPolicy],
|
||||||
|
vcl.atidlas.MatrixProductTemplate: [3,3,3,3,3,3,3,vcl.atidlas.FetchingPolicy,vcl.atidlas.FetchingPolicy,3]
|
||||||
|
}[TemplateType]
|
||||||
|
|
||||||
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
|
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
|
||||||
creator.create("Individual", list, fitness=creator.FitnessMin)
|
creator.create("Individual", list, fitness=creator.FitnessMin)
|
||||||
|
|
||||||
@@ -54,35 +58,39 @@ class GeneticOperators(object):
|
|||||||
self.toolbox.register("mutate", self.mutate)
|
self.toolbox.register("mutate", self.mutate)
|
||||||
self.toolbox.register("select", deap_tools.selNSGA2)
|
self.toolbox.register("select", deap_tools.selNSGA2)
|
||||||
|
|
||||||
@staticmethod
|
def decode(self, genome):
|
||||||
def decode(s):
|
|
||||||
FetchingPolicy = vcl.atidlas.FetchingPolicy
|
FetchingPolicy = vcl.atidlas.FetchingPolicy
|
||||||
fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
|
fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
|
||||||
fetchA = fetch[s[0]]
|
decode_element = lambda x:2**int(b_gray_to_bin(''.join(x)), 2)
|
||||||
fetchB = fetch[s[1]]
|
result = []
|
||||||
bincode = ''.join(s[2:])
|
offset = 0
|
||||||
decode_element = lambda x:2**int(b_gray_to_bin(x), 2)
|
for x in self.genome_info:
|
||||||
simd = decode_element(bincode[0:3])
|
if x==vcl.atidlas.FetchingPolicy:
|
||||||
ls0 = decode_element(bincode[2:5])
|
result.append(fetch[genome[offset]])
|
||||||
ls1 = decode_element(bincode[5:8])
|
offset = offset + 1
|
||||||
kL = decode_element(bincode[8:11])
|
|
||||||
mS = decode_element(bincode[11:14])
|
|
||||||
kS = decode_element(bincode[14:17])
|
|
||||||
nS = decode_element(bincode[17:20])
|
|
||||||
if fetchA==FetchingPolicy.FETCH_FROM_LOCAL or fetchB==FetchingPolicy.FETCH_FROM_LOCAL:
|
|
||||||
lf0 = decode_element(bincode[20:23])
|
|
||||||
lf1 = ls0*ls1/lf0
|
|
||||||
else:
|
else:
|
||||||
lf0, lf1 = 0, 0
|
result.append(decode_element(genome[offset:offset+x]))
|
||||||
return [simd, ls0, kL, ls1, mS, kS, nS, fetchA, fetchB, lf0, lf1]
|
offset = offset + x
|
||||||
|
#GEMM peculiarities
|
||||||
|
if self.TemplateType==vcl.atidlas.MatrixProductTemplate:
|
||||||
|
if FetchingPolicy.FETCH_FROM_LOCAL in result:
|
||||||
|
lf1 = result[1]*result[3]/result[9]
|
||||||
|
else:
|
||||||
|
result[9] = 0
|
||||||
|
lf1 = 0
|
||||||
|
result.append(lf1)
|
||||||
|
return result
|
||||||
|
|
||||||
def init(self, N):
|
def init(self, N):
|
||||||
result = []
|
result = []
|
||||||
fetchcount = [0, 0, 0]
|
|
||||||
while len(result) < N:
|
while len(result) < N:
|
||||||
while True:
|
while True:
|
||||||
fetch = random.randint(0,2)
|
bincode = []
|
||||||
bincode = [fetch, fetch] + [str(random.randint(0,1)) for i in range(23)]
|
for x in self.genome_info:
|
||||||
|
if x==vcl.atidlas.FetchingPolicy:
|
||||||
|
bincode = bincode + [random.randint(0,2)]
|
||||||
|
else:
|
||||||
|
bincode = bincode + [str(random.randint(0,1)) for i in range(x)]
|
||||||
parameters = self.decode(bincode)
|
parameters = self.decode(bincode)
|
||||||
template = self.build_template(self.TemplateType.Parameters(*parameters))
|
template = self.build_template(self.TemplateType.Parameters(*parameters))
|
||||||
registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4
|
registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4
|
||||||
@@ -90,22 +98,18 @@ class GeneticOperators(object):
|
|||||||
local_size = template.parameters.local_size_0*template.parameters.local_size_1
|
local_size = template.parameters.local_size_0*template.parameters.local_size_1
|
||||||
occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage)
|
occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage)
|
||||||
if not tools.skip(template, self.statement, self.device):
|
if not tools.skip(template, self.statement, self.device):
|
||||||
fetchcount[fetch] = fetchcount[fetch] + 1
|
|
||||||
if max(fetchcount) - min(fetchcount) <= 1:
|
|
||||||
result.append(creator.Individual(bincode))
|
result.append(creator.Individual(bincode))
|
||||||
break
|
break
|
||||||
else:
|
|
||||||
fetchcount[fetch] = fetchcount[fetch] - 1
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def mutate(self, individual):
|
def mutate(self, individual):
|
||||||
while True:
|
while True:
|
||||||
new_individual = copy.deepcopy(individual)
|
new_individual = copy.deepcopy(individual)
|
||||||
for i in range(len(new_individual)):
|
for i in range(len(new_individual)):
|
||||||
if i < 2 and random.random() < self.indpb:
|
if isinstance(individual[i], int) and random.random() < self.indpb:
|
||||||
while new_individual[i] == individual[i]:
|
while new_individual[i] == individual[i]:
|
||||||
new_individual[i] = random.randint(0, 2)
|
new_individual[i] = random.randint(0, 2)
|
||||||
elif i >= 2 and random.random() < self.indpb:
|
elif not isinstance(individual[i], int) and random.random() < self.indpb:
|
||||||
new_individual[i] = '1' if new_individual[i]=='0' else '0'
|
new_individual[i] = '1' if new_individual[i]=='0' else '0'
|
||||||
parameters = self.decode(new_individual)
|
parameters = self.decode(new_individual)
|
||||||
template = self.build_template(self.TemplateType.Parameters(*parameters))
|
template = self.build_template(self.TemplateType.Parameters(*parameters))
|
||||||
@@ -176,7 +180,7 @@ class GeneticOperators(object):
|
|||||||
population[:] = self.toolbox.select(population + offspring, mu)
|
population[:] = self.toolbox.select(population + offspring, mu)
|
||||||
#Update
|
#Update
|
||||||
gen = gen + 1
|
gen = gen + 1
|
||||||
best_profile = '(%s)'%','.join(map(str,GeneticOperators.decode(hof[0])));
|
best_profile = '(%s)'%','.join(map(str,self.decode(hof[0])));
|
||||||
best_performance = compute_perf(hof[0].fitness.values[0])
|
best_performance = compute_perf(hof[0].fitness.values[0])
|
||||||
sys.stdout.write('Time %d | Best %d %s [ for %s ]\r'%(time.time() - start_time, best_performance, perf_metric, best_profile))
|
sys.stdout.write('Time %d | Best %d %s [ for %s ]\r'%(time.time() - start_time, best_performance, perf_metric, best_profile))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
@@ -13,12 +13,13 @@ def train_model(X, Y, profiles):
|
|||||||
Xmean = np.mean(X, axis=0)
|
Xmean = np.mean(X, axis=0)
|
||||||
Xstd = np.std(X, axis=0)
|
Xstd = np.std(X, axis=0)
|
||||||
X = (X - Xmean)/Xstd
|
X = (X - Xmean)/Xstd
|
||||||
|
|
||||||
Ymax = np.max(Y)
|
Ymax = np.max(Y)
|
||||||
Y = Y/Ymax
|
Y = Y/Ymax
|
||||||
|
|
||||||
ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile
|
ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile
|
||||||
#Cross-validation data-sets
|
#Cross-validation data-sets
|
||||||
cut = int(0.1*X.shape[0]+1)
|
cut = int(0.800*X.shape[0]+1)
|
||||||
XTr = X[0:cut, :]
|
XTr = X[0:cut, :]
|
||||||
YTr = Y[0:cut, :]
|
YTr = Y[0:cut, :]
|
||||||
XTe = X[cut:,:]
|
XTe = X[cut:,:]
|
||||||
@@ -26,23 +27,15 @@ def train_model(X, Y, profiles):
|
|||||||
|
|
||||||
#Train the model
|
#Train the model
|
||||||
print("Training the model...")
|
print("Training the model...")
|
||||||
ds = SupervisedDataSet(X.shape[1], Y.shape[1])
|
clf = ensemble.RandomForestRegressor(40).fit(XTr,YTr)
|
||||||
for idx, x in enumerate(X):
|
|
||||||
ds.addSample(x, Y[idx,:])
|
|
||||||
clf = buildNetwork(*[X.shape[1], 100, Y.shape[1]], hiddenclass = TanhLayer, outclass = LinearLayer)
|
|
||||||
#print fnn;
|
|
||||||
#trainer = RPropMinusTrainer( fnn, dataset=ds, verbose=True);
|
|
||||||
trainer = BackpropTrainer( clf, dataset=ds, verbose=True, momentum=0.01, weightdecay=0.01, learningrate=0.002, batchlearning=False)
|
|
||||||
trainer.trainUntilConvergence(maxEpochs=100)
|
|
||||||
|
|
||||||
#Evaluate the model
|
#Evaluate the model
|
||||||
GFlops = np.empty(XTe.shape[0])
|
GFlops = np.empty(XTe.shape[0])
|
||||||
speedups = np.empty(XTe.shape[0])
|
speedups = np.empty(XTe.shape[0])
|
||||||
optspeedups = np.empty(XTe.shape[0])
|
optspeedups = np.empty(XTe.shape[0])
|
||||||
for i,x in enumerate(XTe):
|
for i,x in enumerate(XTe):
|
||||||
predictions = clf.activate(x)
|
predictions = clf.predict(x)
|
||||||
label = np.argmax(predictions)
|
label = np.argmax(predictions)
|
||||||
# print YTe[i,label], YTe[i,ref], np.max(YTe[i,:])
|
|
||||||
speedups[i] = YTe[i,label]/YTe[i,ref]
|
speedups[i] = YTe[i,label]/YTe[i,ref]
|
||||||
optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]
|
optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]
|
||||||
GFlops[i] = YTe[i,ref]*Ymax
|
GFlops[i] = YTe[i,ref]*Ymax
|
||||||
@@ -52,7 +45,5 @@ def train_model(X, Y, profiles):
|
|||||||
print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups)))
|
print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups)))
|
||||||
print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups))))
|
print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups))))
|
||||||
print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)]))
|
print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)]))
|
||||||
print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)]))
|
print("Maximum speedup is %f wrt %i GFlops for %s"%(np.max(speedups), GFlops[np.argmax(speedups)], X[np.argmax(speedups)]*Xstd+Xmean))
|
||||||
print("--------")
|
print("--------")
|
||||||
|
|
||||||
print clf
|
|
||||||
|
@@ -48,6 +48,6 @@ from genetic import GeneticOperators
|
|||||||
#~ sys.stdout.flush()
|
#~ sys.stdout.flush()
|
||||||
#~
|
#~
|
||||||
|
|
||||||
def genetic(statement, context, TemplateType, build_template, parameter_names, compute_perf, perf_metric, out):
|
def genetic(statement, device, TemplateType, build_template, compute_perf, perf_metric, out):
|
||||||
GA = GeneticOperators(context.devices[0], statement, parameter_names, TemplateType, build_template, out)
|
GA = GeneticOperators(device, statement, TemplateType, build_template, out)
|
||||||
return GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)
|
return GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)
|
||||||
|
Reference in New Issue
Block a user