diff --git a/autotune/python/autotune.py b/autotune/python/autotune.py index 31f270e29..6931384e1 100644 --- a/autotune/python/autotune.py +++ b/autotune/python/autotune.py @@ -27,120 +27,120 @@ TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate, 'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'], 'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9, 'perf-measure':'GB/s'}, - + 'matrix-axpy': {'template':vcl.atidlas.MatrixAxpyTemplate, 'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'num-groups-1', 'fetch'], 'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9, 'perf-measure':'GB/s'}, - + 'reduction': {'template':vcl.atidlas.ReductionTemplate, 'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'], 'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9, 'perf-measure':'GB/s'}, - + 'row-wise-reduction': {'template':vcl.atidlas.RowWiseReductionTemplate, 'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'fetch'], 'perf-index':lambda x: x[0]*x[1][0]*x[1][1]/x[2]*1e-9, 'perf-measure':'GB/s'}, - + 'matrix-product': {'template':vcl.atidlas.MatrixProductTemplate, 'parameter-names':['simd-width', 'local-size-0', 'kL', 'local-size-1', 'mS', 'kS', 'nS', 'A-fetch-policy', 'B-fetch-policy', 'local-fetch-size-0', 'local-fetch-size-1'], 'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9, 'perf-measure': 'GFLOP/s'} } - -def do_tuning(config_fname, spec_fname, viennacl_root): - config = ConfigObj(config_fname, configspec=spec_fname) - map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]])) - for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']: - if operation in config: - p = config[operation] - confdevices = p['devices'] - devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices] - precisions = map_to_list((str, p['precision'])) - datatypes = [DATATYPES[k] for k in precisions] - #Iterate through the datatypes and the devices - for datatype, device in itertools.product(datatypes, devices): - ctx = cl.Context([device]) - ctx = vcl.backend.Context(ctx) - device = ctx.current_device - #Check data-type - if datatype is vcl.float64 and not device.double_fp_config: - sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...') - continue - #Helper - def execute(statement, other_params, sizes, fname = os.devnull): - print('-----') - print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes)))) - with open(fname, "w+") as archive: - return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), - TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive) - s = map_to_list((int, p['size'])) - #Vector AXPY - if operation=='vector-axpy': - x = vcl.Vector(s[0], context=ctx, dtype=datatype) - y = vcl.Vector(s[0], context=ctx, dtype=datatype) - execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ()) - #Matrix AXPY - if operation=='matrix-axpy': - A = vcl.Matrix(s, context=ctx, dtype=datatype) - B = vcl.Matrix(s, context=ctx, dtype=datatype) - execute(A+B, ()) - #Row-wise reduction - if operation=='row-wise-reduction': - layouts = map_to_list((str,p['layout'])) - if 'all' in layouts: - layouts = ['N', 'T'] - for A_trans in layouts: - A = vcl.Matrix(s if A_trans=='N' else s[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR) - x = vcl.Vector(s[1] if A_trans=='N' else s[0], context=ctx, dtype=datatype) - LHS = A if A_trans=='N' else A.T - execute(LHS*x, ()) - #Matrix Product - if operation=='matrix-product': - layouts = map_to_list((str,p['layout'])) - if 'all' in layouts: - layouts = ['NN', 'NT', 'TN', 'TT'] - for layout in layouts: - def execution_handler(sizes, fname, parameters=None): - A_trans = layout[0] - B_trans = layout[1] - A = vcl.Matrix((sizes[0], sizes[1]) if A_trans=='N' else (sizes[1],sizes[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR); - B = vcl.Matrix((sizes[1], sizes[2]) if B_trans=='N' else (sizes[2],sizes[1]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR); - LHS = A if A_trans=='N' else A.T - RHS = B if B_trans=='N' else B.T - alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype) - beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype) - C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR) - statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta)) - if parameters: - TemplateType = TYPES[operation]['template'] - return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),A_trans,B_trans), statement, device) - else: - execute(statement,(A_trans, B_trans), sizes, fname) - X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler) - train_model(X, Y, profiles) - - + +def do_tuning(config_fname, spec_fname, viennacl_root): + config = ConfigObj(config_fname, configspec=spec_fname) + map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]])) + for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']: + if operation in config: + p = config[operation] + confdevices = p['devices'] + devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices] + precisions = map_to_list((str, p['precision'])) + datatypes = [DATATYPES[k] for k in precisions] + #Iterate through the datatypes and the devices + for datatype, device in itertools.product(datatypes, devices): + ctx = cl.Context([device]) + ctx = vcl.backend.Context(ctx) + device = ctx.current_device + #Check data-type + if datatype is vcl.float64 and not device.double_fp_config: + sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...') + continue + #Helper + def execute(statement, other_params, sizes, fname = os.devnull): + print('-----') + print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes)))) + with open(fname, "w+") as archive: + return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params), + TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive) + s = map_to_list((int, p['size'])) + #Vector AXPY + if operation=='vector-axpy': + x = vcl.Vector(s[0], context=ctx, dtype=datatype) + y = vcl.Vector(s[0], context=ctx, dtype=datatype) + execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ()) + #Matrix AXPY + if operation=='matrix-axpy': + A = vcl.Matrix(s, context=ctx, dtype=datatype) + B = vcl.Matrix(s, context=ctx, dtype=datatype) + execute(A+B, ()) + #Row-wise reduction + if operation=='row-wise-reduction': + layouts = map_to_list((str,p['layout'])) + if 'all' in layouts: + layouts = ['N', 'T'] + for A_trans in layouts: + A = vcl.Matrix(s if A_trans=='N' else s[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR) + x = vcl.Vector(s[1] if A_trans=='N' else s[0], context=ctx, dtype=datatype) + LHS = A if A_trans=='N' else A.T + execute(LHS*x, ()) + #Matrix Product + if operation=='matrix-product': + layouts = map_to_list((str,p['layout'])) + if 'all' in layouts: + layouts = ['NN', 'NT', 'TN', 'TT'] + for layout in layouts: + def execution_handler(sizes, fname, parameters=None): + A_trans = layout[0] + B_trans = layout[1] + A = vcl.Matrix((sizes[0], sizes[1]) if A_trans=='N' else (sizes[1],sizes[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR); + B = vcl.Matrix((sizes[1], sizes[2]) if B_trans=='N' else (sizes[2],sizes[1]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR); + LHS = A if A_trans=='N' else A.T + RHS = B if B_trans=='N' else B.T + alpha = vcl.HostScalar(1.0, context=ctx, dtype = datatype) + beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype) + C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR) + statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta)) + if parameters: + TemplateType = TYPES[operation]['template'] + return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),A_trans,B_trans), statement, device) + else: + execute(statement,(A_trans, B_trans), sizes, fname) + X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler) + train_model(X, Y, profiles) + + if __name__ == "__main__": - parser = argparse.ArgumentParser(); - subparsers = parser.add_subparsers(dest='action') - print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available') - tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file') - tune_parser.add_argument("--config", default="config.ini", required=False, type=str) - tune_parser.add_argument("--viennacl-root", default='', required=False, type=str) - args = parser.parse_args() - - if(args.action=='list-devices'): - print("----------------") - print("Devices available:") - print("----------------") - devices = [d for platform in cl.get_platforms() for d in platform.get_devices()] - for (i, d) in enumerate(devices): - print('Device', i, ':', utils.DEVICE_TYPE_PREFIX[d.type].upper() + ':', d.name, 'on', d.platform.name) - print("----------------") - else: - print("------") - print("Auto-tuning") - print("------") - do_tuning(args.config, 'config_spec.ini', args.viennacl_root) + parser = argparse.ArgumentParser(); + subparsers = parser.add_subparsers(dest='action') + print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available') + tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file') + tune_parser.add_argument("--config", default="config.ini", required=False, type=str) + tune_parser.add_argument("--viennacl-root", default='', required=False, type=str) + args = parser.parse_args() + + if(args.action=='list-devices'): + print("----------------") + print("Devices available:") + print("----------------") + devices = [d for platform in cl.get_platforms() for d in platform.get_devices()] + for (i, d) in enumerate(devices): + print('Device', i, ':', utils.DEVICE_TYPE_PREFIX[d.type].upper() + ':', d.name, 'on', d.platform.name) + print("----------------") + else: + print("------") + print("Auto-tuning") + print("------") + do_tuning(args.config, 'config_spec.ini', args.viennacl_root) diff --git a/autotune/python/dataset.py b/autotune/python/dataset.py index c4d6da4f5..c636be5e9 100644 --- a/autotune/python/dataset.py +++ b/autotune/python/dataset.py @@ -7,95 +7,95 @@ from sklearn.neighbors.kde import KernelDensity; from pyviennacl.atidlas import FetchingPolicy def decode(y): - fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED] - y[7] = fetch[y[7]] - y[8] = fetch[y[8]] - return y - + fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED] + y[7] = fetch[y[7]] + y[8] = fetch[y[8]] + return y + def generate_dataset(TemplateType, execution_handler): - I = 0 - step = 64; - max_size = 4000; - - #Retrieves the existing data - print "Retrieving data..." - path = "./data" - files = os.listdir(path) - X = np.empty((len(files),3)) - t = np.empty(len(files)) - profiles = [] - nonemptyfiles = [] - for i,fname in enumerate(files): - if os.path.getsize(os.path.join(path,fname))>0: - nonemptyfiles.append(fname) - files = nonemptyfiles - - for i,fname in enumerate(files): - MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname) - fl = open(os.path.join(path,fname),"rb") - A = np.loadtxt(fl,delimiter=',') - x = np.array([MNK.group(1), MNK.group(2), MNK.group(3)]).astype(float) - y = tuple(A[np.argmin(A[:,0]),1:]) - if y not in profiles: - profiles.append(y) - idx = profiles.index(y) - X[i,:] = x - t[i] = idx + I = 0 + step = 64; + max_size = 4000; - #Generates new data - print "Generating new data..." - kdes = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)] if files else []; - X.resize((len(files)+I, 3), refcheck=False); - t.resize(len(files)+I, refcheck=False); - - max_square = max_size/step - for i in range(I): - n_per_label = np.bincount(t[0:i+1].astype(int)); - Xtuples = [tuple(x) for x in X]; - r = random.random(); - while(True): - if(len(kdes)==0 or r<=1.0/len(kdes)): - x = np.array([step*random.randint(1,40), step*random.randint(1,40), step*random.randint(1,40)]); - else: - probs = (1.0/n_per_label) - distr = np.random.choice(range(n_per_label.size), p = probs/np.sum(probs)) - x = kdes[distr].sample()[0] - x = np.maximum(np.ones(x.shape),(x - step/2).astype(int)/step + 1)*step - if tuple(x) not in Xtuples: - break; - x = x.astype(int) - fname = os.path.join(path, `x[0]` +"-"+ `x[1]` +"-"+ `x[2]` +".csv") - #Execute auto-tuning procedure - execution_handler(x, fname) - #Load csv into matrix - fl = open(fname,"rb"); - A = np.loadtxt(fl,delimiter=','); - #Update the kernel density estimators - y = tuple(A[np.argmin(A[:,0]),1:]); - if y not in profiles: - profiles.append(y); - kdes.append(KernelDensity(kernel='gaussian', bandwidth=2*step)); - idx = profiles.index(y); - #Update data - X[len(files)+i,:] = x; - t[len(files)+i] = idx; - #Update density estimator p(M,N,K | t=idx) - kdes[idx].fit(X[t[0:len(files)+i+1]==idx,:]); + #Retrieves the existing data + print "Retrieving data..." + path = "./data" + files = os.listdir(path) + X = np.empty((len(files),3)) + t = np.empty(len(files)) + profiles = [] + nonemptyfiles = [] + for i,fname in enumerate(files): + if os.path.getsize(os.path.join(path,fname))>0: + nonemptyfiles.append(fname) + files = nonemptyfiles - - print "Exporting data..."; - #Shuffle the list of file - files = os.listdir(path) - X = np.empty((len(files),3)) - Y = np.zeros((len(files), len(profiles))) - for i,fname in enumerate(files): - MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname) - X[i,:] = map(float,[MNK.group(k) for k in range(1,4)]) - fl = open(os.path.join(path,fname),"rb"); - A = np.loadtxt(fl,delimiter=',') - for j,y in enumerate(profiles): - idx = np.where(np.all(A[:,1:]==y,axis=1))[0] - T = A[idx[0], 0] if idx.size else execution_handler(map(int,X[i,:]), '', decode(map(int, y))) - Y[i,j] = 2*1e-9*X[i,0]*X[i,1]*X[i,2]/T - - return X, Y, profiles + for i,fname in enumerate(files): + MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname) + fl = open(os.path.join(path,fname),"rb") + A = np.loadtxt(fl,delimiter=',') + x = np.array([MNK.group(1), MNK.group(2), MNK.group(3)]).astype(float) + y = tuple(A[np.argmin(A[:,0]),1:]) + if y not in profiles: + profiles.append(y) + idx = profiles.index(y) + X[i,:] = x + t[i] = idx + + #Generates new data + print "Generating new data..." + kdes = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)] if files else []; + X.resize((len(files)+I, 3), refcheck=False); + t.resize(len(files)+I, refcheck=False); + + max_square = max_size/step + for i in range(I): + n_per_label = np.bincount(t[0:i+1].astype(int)); + Xtuples = [tuple(x) for x in X]; + r = random.random(); + while(True): + if(len(kdes)==0 or r<=1.0/len(kdes)): + x = np.array([step*random.randint(1,40), step*random.randint(1,40), step*random.randint(1,40)]); + else: + probs = (1.0/n_per_label) + distr = np.random.choice(range(n_per_label.size), p = probs/np.sum(probs)) + x = kdes[distr].sample()[0] + x = np.maximum(np.ones(x.shape),(x - step/2).astype(int)/step + 1)*step + if tuple(x) not in Xtuples: + break; + x = x.astype(int) + fname = os.path.join(path, `x[0]` +"-"+ `x[1]` +"-"+ `x[2]` +".csv") + #Execute auto-tuning procedure + execution_handler(x, fname) + #Load csv into matrix + fl = open(fname,"rb"); + A = np.loadtxt(fl,delimiter=','); + #Update the kernel density estimators + y = tuple(A[np.argmin(A[:,0]),1:]); + if y not in profiles: + profiles.append(y); + kdes.append(KernelDensity(kernel='gaussian', bandwidth=2*step)); + idx = profiles.index(y); + #Update data + X[len(files)+i,:] = x; + t[len(files)+i] = idx; + #Update density estimator p(M,N,K | t=idx) + kdes[idx].fit(X[t[0:len(files)+i+1]==idx,:]); + + + print "Exporting data..."; + #Shuffle the list of file + files = os.listdir(path) + X = np.empty((len(files),3)) + Y = np.zeros((len(files), len(profiles))) + for i,fname in enumerate(files): + MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname) + X[i,:] = map(float,[MNK.group(k) for k in range(1,4)]) + fl = open(os.path.join(path,fname),"rb"); + A = np.loadtxt(fl,delimiter=',') + for j,y in enumerate(profiles): + idx = np.where(np.all(A[:,1:]==y,axis=1))[0] + T = A[idx[0], 0] if idx.size else execution_handler(map(int,X[i,:]), '', decode(map(int, y))) + Y[i,j] = 2*1e-9*X[i,0]*X[i,1]*X[i,2]/T + + return X, Y, profiles diff --git a/autotune/python/external/configobj.py b/autotune/python/external/configobj.py index 9476b0b28..a1074a507 100644 --- a/autotune/python/external/configobj.py +++ b/autotune/python/external/configobj.py @@ -139,28 +139,28 @@ class UnknownType(Exception): class Builder(object): - + def build(self, o): if m is None: raise UnknownType(o.__class__.__name__) return m(o) - + def build_List(self, o): return list(map(self.build, o.getChildren())) - + def build_Const(self, o): return o.value - + def build_Dict(self, o): d = {} i = iter(map(self.build, o.getChildren())) for el in i: d[el] = next(i) return d - + def build_Tuple(self, o): return tuple(self.build_List(o)) - + def build_Name(self, o): if o.name == 'None': return None @@ -168,10 +168,10 @@ class Builder(object): return True if o.name == 'False': return False - + # An undefined Name raise UnknownType('Undefined Name') - + def build_Add(self, o): real, imag = list(map(self.build_Const, o.getChildren())) try: @@ -181,14 +181,14 @@ class Builder(object): if not isinstance(imag, complex) or imag.real != 0.0: raise UnknownType('Add') return real+imag - + def build_Getattr(self, o): parent = self.build(o.expr) return getattr(parent, o.attrname) - + def build_UnarySub(self, o): return -self.build_Const(o.getChildren()[0]) - + def build_UnaryAdd(self, o): return self.build_Const(o.getChildren()[0]) @@ -199,7 +199,7 @@ _builder = Builder() def unrepr(s): if not s: return s - + # this is supposed to be safe import ast return ast.literal_eval(s) @@ -304,7 +304,7 @@ class InterpolationEngine(object): # short-cut if not self._cookie in value: return value - + def recursive_interpolate(key, value, section, backtrail): """The function that does the actual work. @@ -404,7 +404,7 @@ class InterpolationEngine(object): (e.g., if we interpolated "$$" and returned "$"). """ raise NotImplementedError() - + class ConfigParserInterpolation(InterpolationEngine): @@ -453,27 +453,27 @@ interpolation_engines = { def __newobj__(cls, *args): # Hack for pickle - return cls.__new__(cls, *args) + return cls.__new__(cls, *args) class Section(dict): """ A dictionary-like object that represents a section in a config file. - + It does string interpolation if the 'interpolation' attribute of the 'main' object is set to True. - + Interpolation is tried first from this object, then from the 'DEFAULT' section of this object, next from the parent and its 'DEFAULT' section, and so on until the main object is reached. - + A Section will behave like an ordered dictionary - following the order of the ``scalars`` and ``sections`` attributes. You can use this to change the order of members. - + Iteration follows the order: scalars, then sections. """ - + def __setstate__(self, state): dict.update(self, state[0]) self.__dict__.update(state[1]) @@ -481,8 +481,8 @@ class Section(dict): def __reduce__(self): state = (dict(self), self.__dict__) return (__newobj__, (self.__class__,), state) - - + + def __init__(self, parent, depth, main, indict=None, name=None): """ * parent is the section above @@ -507,8 +507,8 @@ class Section(dict): # (rather than just passing to ``dict.__init__``) for entry, value in indict.items(): self[entry] = value - - + + def _initialise(self): # the sequence of scalar values in this Section self.scalars = [] @@ -552,7 +552,7 @@ class Section(dict): def __getitem__(self, key): """Fetch the item and do string interpolation.""" val = dict.__getitem__(self, key) - if self.main.interpolation: + if self.main.interpolation: if isinstance(val, six.string_types): return self._interpolate(key, val) if isinstance(val, list): @@ -569,20 +569,20 @@ class Section(dict): def __setitem__(self, key, value, unrepr=False): """ Correctly set a value. - + Making dictionary values Section instances. (We have to special case 'Section' instances - which are also dicts) - + Keys must be strings. Values need only be strings (or lists of strings) if ``main.stringify`` is set. - + ``unrepr`` must be set when setting a value to a dictionary, without creating a new sub-section. """ if not isinstance(key, six.string_types): raise ValueError('The key "%s" is not a string.' % key) - + # add the comment if key not in self.comments: self.comments[key] = [] @@ -683,7 +683,7 @@ class Section(dict): """ A version of clear that also affects scalars/sections Also clears comments and configspec. - + Leaves other attributes alone : depth/main/parent are not affected """ @@ -757,10 +757,10 @@ class Section(dict): def dict(self): """ Return a deepcopy of self as a dictionary. - + All members that are ``Section`` instances are recursively turned to ordinary dictionaries - by calling their ``dict`` method. - + >>> n = a.dict() >>> n == a 1 @@ -785,7 +785,7 @@ class Section(dict): def merge(self, indict): """ A recursive update - useful for merging config files. - + >>> a = '''[section1] ... option1 = True ... [[subsection]] @@ -805,17 +805,17 @@ class Section(dict): if (key in self and isinstance(self[key], dict) and isinstance(val, dict)): self[key].merge(val) - else: + else: self[key] = val def rename(self, oldkey, newkey): """ Change a keyname to another, without changing position in sequence. - + Implemented so that transformations can be made on keys, as well as on values. (used by encode and decode) - + Also renames comments. """ if oldkey in self.scalars: @@ -843,30 +843,30 @@ class Section(dict): call_on_sections=False, **keywargs): """ Walk every member and call a function on the keyword and value. - + Return a dictionary of the return values - + If the function raises an exception, raise the errror unless ``raise_errors=False``, in which case set the return value to ``False``. - + Any unrecognised keyword arguments you pass to walk, will be pased on to the function you pass in. - + Note: if ``call_on_sections`` is ``True`` then - on encountering a subsection, *first* the function is called for the *whole* subsection, and then recurses into it's members. This means your function must be able to handle strings, dictionaries and lists. This allows you to change the key of subsections as well as for ordinary members. The return value when called on the whole subsection has to be discarded. - + See the encode and decode methods for examples, including functions. - + .. admonition:: caution - + You can use ``walk`` to transform the names of members of a section but you mustn't add or delete members. - + >>> config = '''[XXXXsection] ... XXXXkey = XXXXvalue'''.splitlines() >>> cfg = ConfigObj(config) @@ -929,17 +929,17 @@ class Section(dict): Accepts a key as input. The corresponding value must be a string or the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to retain compatibility with Python 2.2. - - If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns + + If the string is one of ``True``, ``On``, ``Yes``, or ``1`` it returns ``True``. - - If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns + + If the string is one of ``False``, ``Off``, ``No``, or ``0`` it returns ``False``. - + ``as_bool`` is not case sensitive. - + Any other input will raise a ``ValueError``. - + >>> a = ConfigObj() >>> a['a'] = 'fish' >>> a.as_bool('a') @@ -971,10 +971,10 @@ class Section(dict): def as_int(self, key): """ A convenience method which coerces the specified value to an integer. - + If the value is an invalid literal for ``int``, a ``ValueError`` will be raised. - + >>> a = ConfigObj() >>> a['a'] = 'fish' >>> a.as_int('a') @@ -994,10 +994,10 @@ class Section(dict): def as_float(self, key): """ A convenience method which coerces the specified value to a float. - + If the value is an invalid literal for ``float``, a ``ValueError`` will be raised. - + >>> a = ConfigObj() >>> a['a'] = 'fish' >>> a.as_float('a') #doctest: +IGNORE_EXCEPTION_DETAIL @@ -1011,13 +1011,13 @@ class Section(dict): 3.2... """ return float(self[key]) - - + + def as_list(self, key): """ A convenience method which fetches the specified value, guaranteeing that it is a list. - + >>> a = ConfigObj() >>> a['a'] = 1 >>> a.as_list('a') @@ -1033,15 +1033,15 @@ class Section(dict): if isinstance(result, (tuple, list)): return list(result) return [result] - + def restore_default(self, key): """ Restore (and return) default value for the specified key. - + This method will only work for a ConfigObj that was created with a configspec and has been validated. - + If there is no default value for this key, ``KeyError`` is raised. """ default = self.default_values[key] @@ -1050,20 +1050,20 @@ class Section(dict): self.defaults.append(key) return default - + def restore_defaults(self): """ Recursively restore default values to all members that have them. - + This method will only work for a ConfigObj that was created with a configspec and has been validated. - + It doesn't delete or modify entries without default values. """ for key in self.default_values: self.restore_default(key) - + for section in self.sections: self[section].restore_defaults() @@ -1178,7 +1178,7 @@ class ConfigObj(Section): write_empty_values=False, _inspec=False): """ Parse a config file or create a config file object. - + ``ConfigObj(infile=None, configspec=None, encoding=None, interpolation=True, raise_errors=False, list_values=True, create_empty=False, file_error=False, stringify=True, @@ -1188,9 +1188,9 @@ class ConfigObj(Section): self._inspec = _inspec # init the superclass Section.__init__(self, self, 0, self) - + infile = infile or [] - + _options = {'configspec': configspec, 'encoding': encoding, 'interpolation': interpolation, 'raise_errors': raise_errors, 'list_values': list_values, @@ -1206,7 +1206,7 @@ class ConfigObj(Section): warnings.warn('Passing in an options dictionary to ConfigObj() is ' 'deprecated. Use **options instead.', DeprecationWarning, stacklevel=2) - + # TODO: check the values too. for entry in options: if entry not in OPTION_DEFAULTS: @@ -1217,18 +1217,18 @@ class ConfigObj(Section): keyword_value = _options[entry] if value != keyword_value: options[entry] = keyword_value - + # XXXX this ignores an explicit list_values = True in combination # with _inspec. The user should *never* do that anyway, but still... if _inspec: options['list_values'] = False - + self._initialise(options) configspec = options['configspec'] self._original_configspec = configspec self._load(infile, configspec) - - + + def _load(self, infile, configspec): if isinstance(infile, six.string_types): self.filename = infile @@ -1246,10 +1246,10 @@ class ConfigObj(Section): with open(infile, 'w') as h: h.write('') content = [] - + elif isinstance(infile, (list, tuple)): content = list(infile) - + elif isinstance(infile, dict): # initialise self # the Section class handles creating subsections @@ -1262,18 +1262,18 @@ class ConfigObj(Section): this_section[section] = {} set_section(in_section[section], this_section[section]) set_section(infile, self) - + else: for entry in infile: self[entry] = infile[entry] del self._errors - + if configspec is not None: self._handle_configspec(configspec) else: self.configspec = None return - + elif getattr(infile, 'read', MISSING) is not MISSING: # This supports file like objects content = infile.read() or [] @@ -1300,7 +1300,7 @@ class ConfigObj(Section): assert all(isinstance(line, six.string_types) for line in content), repr(content) content = [line.rstrip('\r\n') for line in content] - + self._parse(content) # if we had any errors, now is the time to raise them if self._errors: @@ -1318,17 +1318,17 @@ class ConfigObj(Section): raise error # delete private attributes del self._errors - + if configspec is None: self.configspec = None else: self._handle_configspec(configspec) - - + + def _initialise(self, options=None): if options is None: options = OPTION_DEFAULTS - + # initialise a few variables self.filename = None self._errors = [] @@ -1345,48 +1345,48 @@ class ConfigObj(Section): self.newlines = None self.write_empty_values = options['write_empty_values'] self.unrepr = options['unrepr'] - + self.initial_comment = [] self.final_comment = [] self.configspec = None - + if self._inspec: self.list_values = False - + # Clear section attributes as well Section._initialise(self) - - + + def __repr__(self): def _getval(key): try: return self[key] except MissingInterpolationOption: return dict.__getitem__(self, key) - return ('ConfigObj({%s})' % - ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) + return ('ConfigObj({%s})' % + ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) for key in (self.scalars + self.sections)])) - - + + def _handle_bom(self, infile): """ Handle any BOM, and decode if necessary. - + If an encoding is specified, that *must* be used - but the BOM should still be removed (and the BOM attribute set). - + (If the encoding is wrongly specified, then a BOM for an alternative encoding won't be discovered or removed.) - + If an encoding is not specified, UTF8 or UTF16 BOM will be detected and removed. The BOM attribute will be set. UTF16 will be decoded to unicode. - + NOTE: This method must not be called with an empty ``infile``. - + Specifying the *wrong* encoding is likely to cause a ``UnicodeDecodeError``. - + ``infile`` must always be returned as a list of lines, but may be passed in as a single string. """ @@ -1397,7 +1397,7 @@ class ConfigObj(Section): # the encoding specified doesn't have one # just decode return self._decode(infile, self.encoding) - + if isinstance(infile, (list, tuple)): line = infile[0] else: @@ -1426,18 +1426,18 @@ class ConfigObj(Section): ##self.BOM = True # Don't need to remove BOM return self._decode(infile, encoding) - + # If we get this far, will *probably* raise a DecodeError # As it doesn't appear to start with a BOM return self._decode(infile, self.encoding) - + # Must be UTF8 BOM = BOM_SET[enc] if not line.startswith(BOM): return self._decode(infile, self.encoding) - + newline = line[len(BOM):] - + # BOM removed if isinstance(infile, (list, tuple)): infile[0] = newline @@ -1445,7 +1445,7 @@ class ConfigObj(Section): infile = newline self.BOM = True return self._decode(infile, self.encoding) - + # No encoding specified - so we need to check for UTF8/UTF16 for BOM, (encoding, final_encoding) in list(BOMS.items()): if not isinstance(line, six.binary_type) or not line.startswith(BOM): @@ -1472,7 +1472,7 @@ class ConfigObj(Section): return self._decode(infile, 'utf-8') # UTF16 - have to decode return self._decode(infile, encoding) - + if six.PY2 and isinstance(line, str): # don't actually do any decoding, since we're on python 2 and @@ -1496,7 +1496,7 @@ class ConfigObj(Section): def _decode(self, infile, encoding): """ Decode infile to unicode. Using the specified encoding. - + if is a string, it also needs converting to a list. """ if isinstance(infile, six.string_types): @@ -1545,14 +1545,14 @@ class ConfigObj(Section): temp_list_values = self.list_values if self.unrepr: self.list_values = False - + comment_list = [] done_start = False this_section = self maxline = len(infile) - 1 cur_index = -1 reset_comment = False - + while cur_index < maxline: if reset_comment: comment_list = [] @@ -1564,13 +1564,13 @@ class ConfigObj(Section): reset_comment = False comment_list.append(line) continue - + if not done_start: # preserve initial comment self.initial_comment = comment_list comment_list = [] done_start = True - + reset_comment = True # first we check if it's a section marker mat = self._sectionmarker.match(line) @@ -1584,7 +1584,7 @@ class ConfigObj(Section): self._handle_error("Cannot compute the section depth at line %s.", NestingError, infile, cur_index) continue - + if cur_depth < this_section.depth: # the new section is dropping back to a previous level try: @@ -1603,13 +1603,13 @@ class ConfigObj(Section): else: self._handle_error("Section too nested at line %s.", NestingError, infile, cur_index) - + sect_name = self._unquote(sect_name) if sect_name in parent: self._handle_error('Duplicate section name at line %s.', DuplicateError, infile, cur_index) continue - + # create the new section this_section = Section( parent, @@ -1710,7 +1710,7 @@ class ConfigObj(Section): """ Given a section and a depth level, walk back through the sections parents to see if the depth level matches a previous section. - + Return a reference to the right section, or raise a SyntaxError. """ @@ -1728,7 +1728,7 @@ class ConfigObj(Section): def _handle_error(self, text, ErrorClass, infile, cur_index): """ Handle an error according to the error settings. - + Either raise the error or store it. The error will have occured at ``cur_index`` """ @@ -1757,19 +1757,19 @@ class ConfigObj(Section): def _quote(self, value, multiline=True): """ Return a safely quoted version of a value. - + Raise a ConfigObjError if the value cannot be safely quoted. If multiline is ``True`` (default) then use triple quotes if necessary. - + * Don't quote values that don't need it. * Recursively quote members of a list and return a comma joined list. * Multiline is ``False`` for lists. * Obey list syntax for empty and single member lists. - + If ``list_values=False`` then the value is only quoted if it contains a ``\\n`` (is multiline) or '#'. - + If ``write_empty_values`` is set, and the value is an empty string, it won't be quoted. """ @@ -1777,7 +1777,7 @@ class ConfigObj(Section): # Only if multiline is set, so that it is used for values not # keys, and not values that are part of a list return '' - + if multiline and isinstance(value, (list, tuple)): if not value: return ',' @@ -1795,12 +1795,12 @@ class ConfigObj(Section): if not value: return '""' - + no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value )) hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value) check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote - + if check_for_single: if not self.list_values: # we don't quote if ``list_values=False`` @@ -1818,13 +1818,13 @@ class ConfigObj(Section): else: # if value has '\n' or "'" *and* '"', it will need triple quotes quot = self._get_triple_quote(value) - + if quot == noquot and '#' in value and self.list_values: quot = self._get_single_quote(value) - + return quot % value - - + + def _get_single_quote(self, value): if ("'" in value) and ('"' in value): raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) @@ -1833,15 +1833,15 @@ class ConfigObj(Section): else: quot = dquot return quot - - + + def _get_triple_quote(self, value): if (value.find('"""') != -1) and (value.find("'''") != -1): raise ConfigObjError('Value "%s" cannot be safely quoted.' % value) if value.find('"""') == -1: quot = tdquot else: - quot = tsquot + quot = tsquot return quot @@ -1931,7 +1931,7 @@ class ConfigObj(Section): def _handle_configspec(self, configspec): """Parse the configspec.""" - # FIXME: Should we check that the configspec was created with the + # FIXME: Should we check that the configspec was created with the # correct settings ? (i.e. ``list_values=False``) if not isinstance(configspec, ConfigObj): try: @@ -1945,11 +1945,11 @@ class ConfigObj(Section): raise ConfigspecError('Parsing configspec failed: %s' % e) except IOError as e: raise IOError('Reading configspec failed: %s' % e) - - self.configspec = configspec - - + self.configspec = configspec + + + def _set_configspec(self, section, copy): """ Called by validate. Handles setting the configspec on subsections @@ -1961,7 +1961,7 @@ class ConfigObj(Section): for entry in section.sections: if entry not in configspec: section[entry].configspec = many - + for entry in configspec.sections: if entry == '__many__': continue @@ -1972,11 +1972,11 @@ class ConfigObj(Section): # copy comments section.comments[entry] = configspec.comments.get(entry, []) section.inline_comments[entry] = configspec.inline_comments.get(entry, '') - + # Could be a scalar when we expect a section if isinstance(section[entry], Section): section[entry].configspec = configspec[entry] - + def _write_line(self, indent_string, entry, this_entry, comment): """Write an individual line, for the write method""" @@ -2016,9 +2016,9 @@ class ConfigObj(Section): def write(self, outfile=None, section=None): """ Write the current ConfigObj as a file - + tekNico: FIXME: use StringIO instead of real files - + >>> filename = a.filename >>> a.filename = 'test.ini' >>> a.write() @@ -2031,7 +2031,7 @@ class ConfigObj(Section): if self.indent_type is None: # this can be true if initialised from a dictionary self.indent_type = DEFAULT_INDENT_TYPE - + out = [] cs = self._a_to_u('#') csp = self._a_to_u('# ') @@ -2045,7 +2045,7 @@ class ConfigObj(Section): if stripped_line and not stripped_line.startswith(cs): line = csp + line out.append(line) - + indent_string = self.indent_type * section.depth for entry in (section.scalars + section.sections): if entry in section.defaults: @@ -2058,7 +2058,7 @@ class ConfigObj(Section): out.append(indent_string + comment_line) this_entry = section[entry] comment = self._handle_comment(section.inline_comments[entry]) - + if isinstance(this_entry, Section): # a section out.append(self._write_marker( @@ -2073,7 +2073,7 @@ class ConfigObj(Section): entry, this_entry, comment)) - + if section is self: for line in self.final_comment: line = self._decode_element(line) @@ -2082,10 +2082,10 @@ class ConfigObj(Section): line = csp + line out.append(line) self.interpolation = int_val - + if section is not self: return out - + if (self.filename is None) and (outfile is None): # output a list of lines # might need to encode @@ -2099,7 +2099,7 @@ class ConfigObj(Section): out.append('') out[0] = BOM_UTF8 + out[0] return out - + # Turn the list to a string, joined with correct newlines newline = self.newlines or os.linesep if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w' @@ -2131,34 +2131,34 @@ class ConfigObj(Section): section=None): """ Test the ConfigObj against a configspec. - + It uses the ``validator`` object from *validate.py*. - + To run ``validate`` on the current ConfigObj, call: :: - + test = config.validate(validator) - + (Normally having previously passed in the configspec when the ConfigObj was created - you can dynamically assign a dictionary of checks to the ``configspec`` attribute of a section though). - + It returns ``True`` if everything passes, or a dictionary of pass/fails (True/False). If every member of a subsection passes, it will just have the value ``True``. (It also returns ``False`` if all members fail). - + In addition, it converts the values from strings to their native types if their checks pass (and ``stringify`` is set). - + If ``preserve_errors`` is ``True`` (``False`` is default) then instead of a marking a fail with a ``False``, it will preserve the actual exception object. This can contain info about the reason for failure. For example the ``VdtValueTooSmallError`` indicates that the value supplied was too small. If a value (or section) is missing it will still be marked as ``False``. - + You must have the validate module to use ``preserve_errors=True``. - + You can then use the ``flatten_errors`` function to turn your nested results dictionary into a flattened list of failures - useful for displaying meaningful error messages. @@ -2171,7 +2171,7 @@ class ConfigObj(Section): # Which makes importing configobj faster from validate import VdtMissingValue self._vdtMissingValue = VdtMissingValue - + section = self if copy: @@ -2181,23 +2181,23 @@ class ConfigObj(Section): section.BOM = section.configspec.BOM section.newlines = section.configspec.newlines section.indent_type = section.configspec.indent_type - + # # section.default_values.clear() #?? configspec = section.configspec self._set_configspec(section, copy) - + def validate_entry(entry, spec, val, missing, ret_true, ret_false): section.default_values.pop(entry, None) - + try: section.default_values[entry] = validator.get_default_value(configspec[entry]) except (KeyError, AttributeError, validator.baseErrorClass): # No default, bad default or validator has no 'get_default_value' # (e.g. SimpleVal) pass - + try: check = validator.check(spec, val, @@ -2231,16 +2231,16 @@ class ConfigObj(Section): if not copy and missing and entry not in section.defaults: section.defaults.append(entry) return ret_true, ret_false - + # out = {} ret_true = True ret_false = True - + unvalidated = [k for k in section.scalars if k not in configspec] - incorrect_sections = [k for k in configspec.sections if k in section.scalars] + incorrect_sections = [k for k in configspec.sections if k in section.scalars] incorrect_scalars = [k for k in configspec.scalars if k in section.sections] - + for entry in configspec.scalars: if entry in ('__many__', '___many___'): # reserved names @@ -2260,16 +2260,16 @@ class ConfigObj(Section): else: missing = False val = section[entry] - - ret_true, ret_false = validate_entry(entry, configspec[entry], val, + + ret_true, ret_false = validate_entry(entry, configspec[entry], val, missing, ret_true, ret_false) - + many = None if '__many__' in configspec.scalars: many = configspec['__many__'] elif '___many___' in configspec.scalars: many = configspec['___many___'] - + if many is not None: for entry in unvalidated: val = section[entry] @@ -2293,7 +2293,7 @@ class ConfigObj(Section): ret_false = False msg = 'Section %r was provided as a single value' % entry out[entry] = validator.baseErrorClass(msg) - + # Missing sections will have been created as empty ones when the # configspec was read. for entry in section.sections: @@ -2314,7 +2314,7 @@ class ConfigObj(Section): ret_false = False else: ret_true = False - + section.extra_values = unvalidated if preserve_errors and not section._created: # If the section wasn't created (i.e. it wasn't missing) @@ -2343,12 +2343,12 @@ class ConfigObj(Section): self.configspec = None # Just to be sure ;-) self._original_configspec = None - - + + def reload(self): """ Reload a ConfigObj from file. - + This method raises a ``ReloadError`` if the ConfigObj doesn't have a filename attribute pointing to a file. """ @@ -2361,31 +2361,31 @@ class ConfigObj(Section): if entry == 'configspec': continue current_options[entry] = getattr(self, entry) - + configspec = self._original_configspec current_options['configspec'] = configspec - + self.clear() self._initialise(current_options) self._load(filename, configspec) - + class SimpleVal(object): """ A simple validator. Can be used to check that all members expected are present. - + To use it, provide a configspec with all your members in (the value given will be ignored). Pass an instance of ``SimpleVal`` to the ``validate`` method of your ``ConfigObj``. ``validate`` will return ``True`` if all members are present, or a dictionary with True/False meaning present/missing. (Whole missing sections will be replaced with ``False``) """ - + def __init__(self): self.baseErrorClass = ConfigObjError - + def check(self, check, member, missing=False): """A dummy check method, always returns the value unchanged.""" if missing: @@ -2397,32 +2397,32 @@ def flatten_errors(cfg, res, levels=None, results=None): """ An example function that will turn a nested dictionary of results (as returned by ``ConfigObj.validate``) into a flat list. - + ``cfg`` is the ConfigObj instance being checked, ``res`` is the results dictionary returned by ``validate``. - + (This is a recursive function, so you shouldn't use the ``levels`` or ``results`` arguments - they are used by the function.) - + Returns a list of keys that failed. Each member of the list is a tuple:: - + ([list of sections...], key, result) - + If ``validate`` was called with ``preserve_errors=False`` (the default) then ``result`` will always be ``False``. *list of sections* is a flattened list of sections that the key was found in. - + If the section was missing (or a section was expected and a scalar provided - or vice-versa) then key will be ``None``. - + If the value (or section) was missing then ``result`` will be ``False``. - + If ``validate`` was called with ``preserve_errors=True`` and a value was present, but failed the check, then ``result`` will be the exception object returned. You can use this as a string that describes the failure. - + For example *The value "3" is of the wrong type*. """ if levels is None: @@ -2457,21 +2457,21 @@ def get_extra_values(conf, _prepend=()): """ Find all the values and sections not in the configspec from a validated ConfigObj. - + ``get_extra_values`` returns a list of tuples where each tuple represents either an extra section, or an extra value. - - The tuples contain two values, a tuple representing the section the value + + The tuples contain two values, a tuple representing the section the value is in and the name of the extra values. For extra values in the top level section the first member will be an empty tuple. For values in the 'foo' section the first member will be ``('foo',)``. For members in the 'bar' subsection of the 'foo' section the first member will be ``('foo', 'bar')``. - + NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't been validated it will return an empty list. """ out = [] - + out.extend([(_prepend, name) for name in conf.extra_values]) for name in conf.sections: if name not in conf.extra_values: diff --git a/autotune/python/genetic.py b/autotune/python/genetic.py index d0ebd5750..ae6568fc9 100644 --- a/autotune/python/genetic.py +++ b/autotune/python/genetic.py @@ -13,15 +13,15 @@ from deap import tools as deap_tools from collections import OrderedDict as odict - + def closest_divisor(N, x): - x_low=x_high=max(1,min(round(x),N)) - while N % x_low > 0 and x_low>0: - x_low = x_low - 1 - while N % x_high > 0 and x_high < N: - x_high = x_high + 1 - return x_low if x - x_low < x_high - x else x_high - + x_low=x_high=max(1,min(round(x),N)) + while N % x_low > 0 and x_low>0: + x_low = x_low - 1 + while N % x_high > 0 and x_high < N: + x_high = x_high + 1 + return x_low if x - x_low < x_high - x else x_high + def b_gray_to_bin(A='00000000', endian='big'): assert type(endian) is str assert endian == 'little' or endian == 'big' @@ -30,157 +30,155 @@ def b_gray_to_bin(A='00000000', endian='big'): for i in range(1, len(A)): b += str( int(b[i-1] != A[i]) ) if endian == 'little': b = b[::-1] # Convert back to little endian if necessary return b - + class GeneticOperators(object): - - def __init__(self, device, statement, parameter_names, TemplateType, build_template, out): - self.device = device - self.statement = statement - self.parameter_names = parameter_names - self.TemplateType = TemplateType - self.ParameterType = TemplateType.Parameters - self.build_template = build_template - self.cache = {} - self.indpb = 0.05 - self.out = out - - creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) - creator.create("Individual", list, fitness=creator.FitnessMin) - - self.toolbox = base.Toolbox() - self.toolbox.register("population", self.init) - self.toolbox.register("evaluate", self.evaluate) - self.toolbox.register("mate", deap_tools.cxTwoPoint) - self.toolbox.register("mutate", self.mutate) - self.toolbox.register("select", deap_tools.selNSGA2) - @staticmethod - def decode(s): - FetchingPolicy = vcl.atidlas.FetchingPolicy - fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED] - fetchA = fetch[s[0]] - fetchB = fetch[s[1]] - bincode = ''.join(s[2:]) - decode_element = lambda x:2**int(b_gray_to_bin(x), 2) - simd = decode_element(bincode[0:3]) - ls0 = decode_element(bincode[2:5]) - ls1 = decode_element(bincode[5:8]) - kL = decode_element(bincode[8:11]) - mS = decode_element(bincode[11:14]) - kS = decode_element(bincode[14:17]) - nS = decode_element(bincode[17:20]) - if fetchA==FetchingPolicy.FETCH_FROM_LOCAL or fetchB==FetchingPolicy.FETCH_FROM_LOCAL: - lf0 = decode_element(bincode[20:23]) - lf1 = ls0*ls1/lf0 - else: - lf0, lf1 = 0, 0 - return [simd, ls0, kL, ls1, mS, kS, nS, fetchA, fetchB, lf0, lf1] - - def init(self, N): - result = [] - fetchcount = [0, 0, 0] - while len(result) < N: - while True: - fetch = random.randint(0,2) - bincode = [fetch, fetch] + [str(random.randint(0,1)) for i in range(23)] - parameters = self.decode(bincode) - template = self.build_template(self.TemplateType.Parameters(*parameters)) - registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4 - lmem_usage = template.lmem_usage(vcl.atidlas.StatementsTuple(self.statement)) - local_size = template.parameters.local_size_0*template.parameters.local_size_1 - occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage) - if not tools.skip(template, self.statement, self.device): - fetchcount[fetch] = fetchcount[fetch] + 1 - if max(fetchcount) - min(fetchcount) <= 1: - result.append(creator.Individual(bincode)) - break - else: - fetchcount[fetch] = fetchcount[fetch] - 1 - return result + def __init__(self, device, statement, parameter_names, TemplateType, build_template, out): + self.device = device + self.statement = statement + self.parameter_names = parameter_names + self.TemplateType = TemplateType + self.ParameterType = TemplateType.Parameters + self.build_template = build_template + self.cache = {} + self.indpb = 0.05 + self.out = out - def mutate(self, individual): - while True: - new_individual = copy.deepcopy(individual) - for i in range(len(new_individual)): - if i < 2 and random.random() < self.indpb: - while new_individual[i] == individual[i]: - new_individual[i] = random.randint(0, 2) - elif i >= 2 and random.random() < self.indpb: - new_individual[i] = '1' if new_individual[i]=='0' else '0' - parameters = self.decode(new_individual) - template = self.build_template(self.TemplateType.Parameters(*parameters)) - #print tools.skip(template, self.statement, self.device), parameters - if not tools.skip(template, self.statement, self.device): - break - return new_individual, - - def evaluate(self, individual): - if tuple(individual) not in self.cache: - parameters = self.decode(individual) - template = self.build_template(self.TemplateType.Parameters(*parameters)) - try: - tt = tools.benchmark(template, self.statement, self.device) - self.out.write(','.join([str(tt)]+map(str,map(int,parameters)))+'\n') - self.cache[tuple(individual)] = tt - except: - self.cache[tuple(individual)] = 10 - return self.cache[tuple(individual)], - - def optimize(self, maxtime, maxgen, compute_perf, perf_metric): - hof = deap_tools.HallOfFame(1) - # Begin the generational process - gen = 0 - maxtime = time.strptime(maxtime, '%Mm%Ss') - maxtime = maxtime.tm_min*60 + maxtime.tm_sec - start_time = time.time() - - mu = 30 - cxpb = 0.2 - mutpb = 0.7 - - population = self.init(mu) - invalid_ind = [ind for ind in population if not ind.fitness.valid] - fitnesses = self.toolbox.map(self.evaluate, invalid_ind) - for ind, fit in zip(invalid_ind, fitnesses): - ind.fitness.values = fit - hof.update(population) - - while time.time() - start_time < maxtime: - # Vary the population - offspring = [] - for _ in xrange(mu): - op_choice = random.random() - if op_choice < cxpb: # Apply crossover - ind1, ind2 = map(self.toolbox.clone, random.sample(population, 2)) - ind1, ind2 = self.toolbox.mate(ind1, ind2) - del ind1.fitness.values - offspring.append(ind1) - elif op_choice < cxpb + mutpb: # Apply mutation - ind = self.toolbox.clone(random.choice(population)) - ind, = self.toolbox.mutate(ind) - del ind.fitness.values - offspring.append(ind) - else: # Apply reproduction - offspring.append(random.choice(population)) - - #~ for x in offspring: - #~ print self.decode(x) - # Evaluate the individuals with an invalid fitness - invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) + creator.create("Individual", list, fitness=creator.FitnessMin) + + self.toolbox = base.Toolbox() + self.toolbox.register("population", self.init) + self.toolbox.register("evaluate", self.evaluate) + self.toolbox.register("mate", deap_tools.cxTwoPoint) + self.toolbox.register("mutate", self.mutate) + self.toolbox.register("select", deap_tools.selNSGA2) + + @staticmethod + def decode(s): + FetchingPolicy = vcl.atidlas.FetchingPolicy + fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED] + fetchA = fetch[s[0]] + fetchB = fetch[s[1]] + bincode = ''.join(s[2:]) + decode_element = lambda x:2**int(b_gray_to_bin(x), 2) + simd = decode_element(bincode[0:3]) + ls0 = decode_element(bincode[2:5]) + ls1 = decode_element(bincode[5:8]) + kL = decode_element(bincode[8:11]) + mS = decode_element(bincode[11:14]) + kS = decode_element(bincode[14:17]) + nS = decode_element(bincode[17:20]) + if fetchA==FetchingPolicy.FETCH_FROM_LOCAL or fetchB==FetchingPolicy.FETCH_FROM_LOCAL: + lf0 = decode_element(bincode[20:23]) + lf1 = ls0*ls1/lf0 + else: + lf0, lf1 = 0, 0 + return [simd, ls0, kL, ls1, mS, kS, nS, fetchA, fetchB, lf0, lf1] + + def init(self, N): + result = [] + fetchcount = [0, 0, 0] + while len(result) < N: + while True: + fetch = random.randint(0,2) + bincode = [fetch, fetch] + [str(random.randint(0,1)) for i in range(23)] + parameters = self.decode(bincode) + template = self.build_template(self.TemplateType.Parameters(*parameters)) + registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4 + lmem_usage = template.lmem_usage(vcl.atidlas.StatementsTuple(self.statement)) + local_size = template.parameters.local_size_0*template.parameters.local_size_1 + occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage) + if not tools.skip(template, self.statement, self.device): + fetchcount[fetch] = fetchcount[fetch] + 1 + if max(fetchcount) - min(fetchcount) <= 1: + result.append(creator.Individual(bincode)) + break + else: + fetchcount[fetch] = fetchcount[fetch] - 1 + return result + + def mutate(self, individual): + while True: + new_individual = copy.deepcopy(individual) + for i in range(len(new_individual)): + if i < 2 and random.random() < self.indpb: + while new_individual[i] == individual[i]: + new_individual[i] = random.randint(0, 2) + elif i >= 2 and random.random() < self.indpb: + new_individual[i] = '1' if new_individual[i]=='0' else '0' + parameters = self.decode(new_individual) + template = self.build_template(self.TemplateType.Parameters(*parameters)) + #print tools.skip(template, self.statement, self.device), parameters + if not tools.skip(template, self.statement, self.device): + break + return new_individual, + + def evaluate(self, individual): + if tuple(individual) not in self.cache: + parameters = self.decode(individual) + template = self.build_template(self.TemplateType.Parameters(*parameters)) + try: + tt = tools.benchmark(template, self.statement, self.device) + self.out.write(','.join([str(tt)]+map(str,map(int,parameters)))+'\n') + self.cache[tuple(individual)] = tt + except: + self.cache[tuple(individual)] = 10 + return self.cache[tuple(individual)], + + def optimize(self, maxtime, maxgen, compute_perf, perf_metric): + hof = deap_tools.HallOfFame(1) + # Begin the generational process + gen = 0 + maxtime = time.strptime(maxtime, '%Mm%Ss') + maxtime = maxtime.tm_min*60 + maxtime.tm_sec + start_time = time.time() + + mu = 30 + cxpb = 0.2 + mutpb = 0.7 + + population = self.init(mu) + invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = self.toolbox.map(self.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit - # Update the hall of fame with the generated individuals - hof.update(offspring) - # Select the next generation population - population[:] = self.toolbox.select(population + offspring, mu) - #Update - gen = gen + 1 - best_profile = '(%s)'%','.join(map(str,GeneticOperators.decode(hof[0]))); - best_performance = compute_perf(hof[0].fitness.values[0]) - sys.stdout.write('Time %d | Best %d %s [ for %s ]\r'%(time.time() - start_time, best_performance, perf_metric, best_profile)) - sys.stdout.flush() - sys.stdout.write('\n') - return population - - + hof.update(population) + + while time.time() - start_time < maxtime: + # Vary the population + offspring = [] + for _ in xrange(mu): + op_choice = random.random() + if op_choice < cxpb: # Apply crossover + ind1, ind2 = map(self.toolbox.clone, random.sample(population, 2)) + ind1, ind2 = self.toolbox.mate(ind1, ind2) + del ind1.fitness.values + offspring.append(ind1) + elif op_choice < cxpb + mutpb: # Apply mutation + ind = self.toolbox.clone(random.choice(population)) + ind, = self.toolbox.mutate(ind) + del ind.fitness.values + offspring.append(ind) + else: # Apply reproduction + offspring.append(random.choice(population)) + + #~ for x in offspring: + #~ print self.decode(x) + # Evaluate the individuals with an invalid fitness + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + fitnesses = self.toolbox.map(self.evaluate, invalid_ind) + for ind, fit in zip(invalid_ind, fitnesses): + ind.fitness.values = fit + # Update the hall of fame with the generated individuals + hof.update(offspring) + # Select the next generation population + population[:] = self.toolbox.select(population + offspring, mu) + #Update + gen = gen + 1 + best_profile = '(%s)'%','.join(map(str,GeneticOperators.decode(hof[0]))); + best_performance = compute_perf(hof[0].fitness.values[0]) + sys.stdout.write('Time %d | Best %d %s [ for %s ]\r'%(time.time() - start_time, best_performance, perf_metric, best_profile)) + sys.stdout.flush() + sys.stdout.write('\n') + return population diff --git a/autotune/python/model.py b/autotune/python/model.py index c1eb3dbca..782ab4790 100644 --- a/autotune/python/model.py +++ b/autotune/python/model.py @@ -4,41 +4,41 @@ import numpy as np import scipy as sp def train_model(X, Y, profiles): - #Preprocessing - scaler = preprocessing.StandardScaler().fit(X); - X = scaler.transform(X); - ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile - - print Y - print np.bincount(np.argmax(Y, axis=1)) - #Cross-validation data-sets - cut = int(0.5*X.shape[0]+1); - XTr = X[0:cut, :]; - YTr = Y[0:cut, :]; - XTe = X[cut:,:]; - YTe = Y[cut:,:]; + #Preprocessing + scaler = preprocessing.StandardScaler().fit(X); + X = scaler.transform(X); + ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile - #Train the model - print("Training the model..."); - clf = linear_model.LinearRegression().fit(XTr,YTr); - - #Evaluate the model - GFlops = np.empty(XTe.shape[0]); - speedups = np.empty(XTe.shape[0]); - optspeedups = np.empty(XTe.shape[0]); - for i,x in enumerate(XTe): - predictions = clf.predict(x); - label = np.argmax(predictions); - speedups[i] = YTe[i,label]/YTe[i,ref]; - optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]; - GFlops[i] = YTe[i,ref]; - - np.set_printoptions(precision=2); - print("-----------------"); - print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups))); - print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups)))); - print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)])); - print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)])); - print("--------"); - - print clf + print Y + print np.bincount(np.argmax(Y, axis=1)) + #Cross-validation data-sets + cut = int(0.5*X.shape[0]+1); + XTr = X[0:cut, :]; + YTr = Y[0:cut, :]; + XTe = X[cut:,:]; + YTe = Y[cut:,:]; + + #Train the model + print("Training the model..."); + clf = linear_model.LinearRegression().fit(XTr,YTr); + + #Evaluate the model + GFlops = np.empty(XTe.shape[0]); + speedups = np.empty(XTe.shape[0]); + optspeedups = np.empty(XTe.shape[0]); + for i,x in enumerate(XTe): + predictions = clf.predict(x); + label = np.argmax(predictions); + speedups[i] = YTe[i,label]/YTe[i,ref]; + optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref]; + GFlops[i] = YTe[i,ref]; + + np.set_printoptions(precision=2); + print("-----------------"); + print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups))); + print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups)))); + print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)])); + print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)])); + print("--------"); + + print clf diff --git a/autotune/python/optimize.py b/autotune/python/optimize.py index a2439b28f..710165a42 100644 --- a/autotune/python/optimize.py +++ b/autotune/python/optimize.py @@ -21,8 +21,8 @@ from genetic import GeneticOperators #~ if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch] #~ if operation == 'row-wise-reduction': return [simd, pow2_2D, pow2_2D, pow2_1D, fetch] #~ if operation == 'matrix-product': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D_unrolled, pow2_2D_unrolled, pow2_2D_unrolled, fetch, fetch, [0] + pow2_2D, [0] + pow2_2D] - #~ - + #~ + #~ def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out): #~ device = context.devices[0] #~ nvalid = 0 @@ -46,8 +46,8 @@ from genetic import GeneticOperators #~ pass #~ sys.stdout.write('\n') #~ sys.stdout.flush() - #~ - + #~ + def genetic(statement, context, TemplateType, build_template, parameter_names, compute_perf, perf_metric, out): - GA = GeneticOperators(context.devices[0], statement, parameter_names, TemplateType, build_template, out) - GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric) + GA = GeneticOperators(context.devices[0], statement, parameter_names, TemplateType, build_template, out) + GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric) diff --git a/autotune/python/tools.py b/autotune/python/tools.py index 14a1d9b5b..3136b4ed8 100644 --- a/autotune/python/tools.py +++ b/autotune/python/tools.py @@ -5,134 +5,136 @@ from pyviennacl.atidlas import StatementsTuple class PhysicalLimits: def __init__(self, dev): - self.compute_capability = pyopencl.characterize.nv_compute_capability(dev) - if self.compute_capability[0]==1: - if self.compute_capability[1]<=1: - self.warps_per_mp = 24 - self.threads_per_mp = 768 - self.num_32b_reg_per_mp = 8192 - self.reg_alloc_unit_size = 256 - else: - self.warps_per_mp = 32 - self.threads_per_mp = 1024 - self.num_32b_reg_per_mp = 16384 - self.reg_alloc_unit_size = 512 - self.threads_per_warp = 32 - self.thread_blocks_per_mp = 8 - self.reg_alloc_granularity = 'block' - self.reg_per_thread = 124 - self.shared_mem_per_mp = 16384 - self.shared_mem_alloc_unit_size = 512 - self.warp_alloc_granularity = 2 - self.max_thread_block_size = 512 - - elif self.compute_capability[0]==2: - self.threads_per_warp = 32 - self.warps_per_mp = 48 - self.threads_per_mp = 1536 - self.thread_blocks_per_mp = 8 - self.num_32b_reg_per_mp = 32768 - self.reg_alloc_unit_size = 64 - self.reg_alloc_granularity = 'warp' - self.reg_per_thread = 63 - self.shared_mem_per_mp = 49152 - self.shared_mem_alloc_unit_size = 128 - self.warp_alloc_granularity = 2 - self.max_thread_block_size = 1024 - - elif self.compute_capability[0]==3: - self.threads_per_warp = 32 - self.warps_per_mp = 64 - self.threads_per_mp = 2048 - self.thread_blocks_per_mp = 16 - self.num_32b_reg_per_mp = 65536 - self.reg_alloc_unit_size = 256 - self.reg_alloc_granularity = 'warp' - if(self.compute_capability[1]==5): - self.reg_per_thread = 255 - else: - self.reg_per_thread = 63 - self.shared_mem_per_mp = 49152 - self.shared_mem_alloc_unit_size = 256 - self.warp_alloc_granularity = 4 - self.max_thread_block_size = 1024 - - else: - raise Exception('Compute capability not supported!') - -def _int_floor(value, multiple_of=1): - """Round C{value} down to be a C{multiple_of} something.""" - # Mimicks the Excel "floor" function (for code stolen from occupancy calculator) + self.compute_capability = pyopencl.characterize.nv_compute_capability(dev) + if self.compute_capability[0]==1: + if self.compute_capability[1]<=1: + self.warps_per_mp = 24 + self.threads_per_mp = 768 + self.num_32b_reg_per_mp = 8192 + self.reg_alloc_unit_size = 256 + else: + self.warps_per_mp = 32 + self.threads_per_mp = 1024 + self.num_32b_reg_per_mp = 16384 + self.reg_alloc_unit_size = 512 + self.threads_per_warp = 32 + self.thread_blocks_per_mp = 8 + self.reg_alloc_granularity = 'block' + self.reg_per_thread = 124 + self.shared_mem_per_mp = 16384 + self.shared_mem_alloc_unit_size = 512 + self.warp_alloc_granularity = 2 + self.max_thread_block_size = 512 - from math import floor - return int(floor(value/multiple_of))*multiple_of - -def _int_ceiling(value, multiple_of=1): - """Round C{value} up to be a C{multiple_of} something.""" - # Mimicks the Excel "floor" function (for code stolen from occupancy calculator) + elif self.compute_capability[0]==2: + self.threads_per_warp = 32 + self.warps_per_mp = 48 + self.threads_per_mp = 1536 + self.thread_blocks_per_mp = 8 + self.num_32b_reg_per_mp = 32768 + self.reg_alloc_unit_size = 64 + self.reg_alloc_granularity = 'warp' + self.reg_per_thread = 63 + self.shared_mem_per_mp = 49152 + self.shared_mem_alloc_unit_size = 128 + self.warp_alloc_granularity = 2 + self.max_thread_block_size = 1024 + + elif self.compute_capability[0]==3: + self.threads_per_warp = 32 + self.warps_per_mp = 64 + self.threads_per_mp = 2048 + self.thread_blocks_per_mp = 16 + self.num_32b_reg_per_mp = 65536 + self.reg_alloc_unit_size = 256 + self.reg_alloc_granularity = 'warp' + if(self.compute_capability[1]==5): + self.reg_per_thread = 255 + else: + self.reg_per_thread = 63 + self.shared_mem_per_mp = 49152 + self.shared_mem_alloc_unit_size = 256 + self.warp_alloc_granularity = 4 + self.max_thread_block_size = 1024 + + else: + raise Exception('Compute capability not supported!') - from math import ceil - return int(ceil(value/multiple_of))*multiple_of - class OccupancyRecord: - + + def _int_floor(value, multiple_of=1): + """Round C{value} down to be a C{multiple_of} something.""" + # Mimicks the Excel "floor" function (for code stolen from occupancy calculator) + from math import floor + return int(floor(value/multiple_of))*multiple_of + + def _int_ceiling(value, multiple_of=1): + """Round C{value} up to be a C{multiple_of} something.""" + # Mimicks the Excel "floor" function (for code stolen from occupancy calculator) + from math import ceil + return int(ceil(value/multiple_of))*multiple_of + + def init_nvidia(self, dev, threads, shared_mem, registers): + physical_limits = PhysicalLimits(dev) + limits = []; + allocated_warps = max(1,_int_ceiling(threads/physical_limits.threads_per_warp)) + max_warps_per_mp = physical_limits.warps_per_mp; + limits.append((min(physical_limits.thread_blocks_per_mp, _int_floor(max_warps_per_mp/allocated_warps)), 'warps')) + + if registers>0: + if registers > physical_limits.reg_per_thread: + limits.append((0, 'registers')) + else: + allocated_regs = {'warp': allocated_warps, + 'block': _int_ceiling(_int_ceiling(allocated_warps, physical_limits.warp_alloc_granularity)*registers*physical_limits.threads_per_warp,allocated_warps)}[physical_limits.reg_alloc_granularity] + max_reg_per_mp = {'warp': _int_floor(physical_limits.num_32b_reg_per_mp/_int_ceiling(registers*physical_limits.threads_per_warp, physical_limits.reg_alloc_unit_size), physical_limits.warp_alloc_granularity), + 'block':physical_limits.num_32b_reg_per_mp}[physical_limits.reg_alloc_granularity] + limits.append((_int_floor(max_reg_per_mp/allocated_regs), 'registers')) + + if shared_mem>0: + allocated_shared_mem = _int_ceiling(shared_mem, physical_limits.shared_mem_alloc_unit_size) + max_shared_mem_per_mp = physical_limits.shared_mem_per_mp + limits.append((_int_floor(max_shared_mem_per_mp/allocated_shared_mem), 'shared memory')) + + self.limit, self.limited_by = min(limits) + self.warps_per_mp = self.limit*allocated_warps + self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp + def __init__(self, dev, threads, shared_mem=0, registers=0): - physical_limits = PhysicalLimits(dev) - limits = []; - allocated_warps = max(1,_int_ceiling(threads/physical_limits.threads_per_warp)) - max_warps_per_mp = physical_limits.warps_per_mp; - limits.append((min(physical_limits.thread_blocks_per_mp, _int_floor(max_warps_per_mp/allocated_warps)), 'warps')) - - if registers>0: - if registers > physical_limits.reg_per_thread: - limits.append((0, 'registers')) - else: - allocated_regs = {'warp': allocated_warps, - 'block': _int_ceiling(_int_ceiling(allocated_warps, physical_limits.warp_alloc_granularity)*registers*physical_limits.threads_per_warp,allocated_warps)}[physical_limits.reg_alloc_granularity] - max_reg_per_mp = {'warp': _int_floor(physical_limits.num_32b_reg_per_mp/_int_ceiling(registers*physical_limits.threads_per_warp, physical_limits.reg_alloc_unit_size), physical_limits.warp_alloc_granularity), - 'block':physical_limits.num_32b_reg_per_mp}[physical_limits.reg_alloc_granularity] - limits.append((_int_floor(max_reg_per_mp/allocated_regs), 'registers')) - - if shared_mem>0: - allocated_shared_mem = _int_ceiling(shared_mem, physical_limits.shared_mem_alloc_unit_size) - max_shared_mem_per_mp = physical_limits.shared_mem_per_mp - limits.append((_int_floor(max_shared_mem_per_mp/allocated_shared_mem), 'shared memory')) - - self.limit, self.limited_by = min(limits) - self.warps_per_mp = self.limit*allocated_warps - self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp - + self.init_nvidia(self, dev, threads, shared_mem, registers) + + def skip(template, statement, device): - statements = StatementsTuple(statement) - registers_usage = template.registers_usage(statements)/4 - lmem_usage = template.lmem_usage(statements) - local_size = template.parameters.local_size_0*template.parameters.local_size_1 - occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage) - if template.check(statement) or occupancy_record.occupancy < 15: + statements = StatementsTuple(statement) + registers_usage = template.registers_usage(statements)/4 + lmem_usage = template.lmem_usage(statements) + local_size = template.parameters.local_size_0*template.parameters.local_size_1 + occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage) + if template.check(statement) or occupancy_record.occupancy < 15: return True - return False - + return False + def benchmark(template, statement, device): - statements = StatementsTuple(statement) - registers_usage = template.registers_usage(statements)/4 - lmem_usage = template.lmem_usage(statements) - local_size = template.parameters.local_size_0*template.parameters.local_size_1 - occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage) - if occupancy_record.occupancy < 15 : + statements = StatementsTuple(statement) + registers_usage = template.registers_usage(statements)/4 + lmem_usage = template.lmem_usage(statements) + local_size = template.parameters.local_size_0*template.parameters.local_size_1 + occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage) + if occupancy_record.occupancy < 15 : raise ValueError("Template has too low occupancy") - else: + else: #~ try: template.execute(statement, True) statement.result.context.finish_all_queues() N = 0 current_time = 0 while current_time < 1e-2: - time_before = time.time() - template.execute(statement,False) - statement.result.context.finish_all_queues() - current_time += time.time() - time_before - N+=1 + time_before = time.time() + template.execute(statement,False) + statement.result.context.finish_all_queues() + current_time += time.time() - time_before + N+=1 return current_time/N #~ except: - #~ raise ValueError("Invalid template") + #~ raise ValueError("Invalid template") diff --git a/autotune/python/utils.py b/autotune/python/utils.py index ae573a974..f8871eda9 100644 --- a/autotune/python/utils.py +++ b/autotune/python/utils.py @@ -5,18 +5,18 @@ all_devices = [d for platform in cl.get_platforms() for d in platform.get_device DEVICE_TYPE_PREFIX = { cl.device_type.GPU: 'gpu', cl.device_type.CPU: 'cpu', - cl.device_type.ACCELERATOR: 'accelerator' + cl.device_type.ACCELERATOR: 'accelerator' } - + DEVICE_TYPE_CL_NAME = { cl.device_type.GPU: 'CL_DEVICE_TYPE_GPU', cl.device_type.CPU: 'CL_DEVICE_TYPE_CPU', - cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR' + cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR' } - + VENDOR_PREFIX = { vcl.opencl.VendorId.beignet_id: 'beignet', vcl.opencl.VendorId.nvidia_id: 'nvidia', vcl.opencl.VendorId.amd_id: 'amd', - vcl.opencl.VendorId.intel_id: 'intel' + vcl.opencl.VendorId.intel_id: 'intel' } DEVICES_PRESETS = {'all': all_devices, @@ -26,8 +26,8 @@ DEVICES_PRESETS = {'all': all_devices, } - + def sanitize_string(string, keep_chars = ['_']): - string = string.replace(' ', '_').lower() - string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip() - return string + string = string.replace(' ', '_').lower() + string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip() + return string diff --git a/autotune/python/vclio.py b/autotune/python/vclio.py index d1cddca9b..088582e57 100644 --- a/autotune/python/vclio.py +++ b/autotune/python/vclio.py @@ -3,114 +3,114 @@ import os import utils def append_include(data, path): - include_name = '#include "' + path +'"\n' - already_included = data.find(include_name) - if already_included == -1: - insert_index = data.index('\n', data.index('#define')) + 1 - return data[:insert_index] + '\n' + include_name + data[insert_index:] - return data - + include_name = '#include "' + path +'"\n' + already_included = data.find(include_name) + if already_included == -1: + insert_index = data.index('\n', data.index('#define')) + 1 + return data[:insert_index] + '\n' + include_name + data[insert_index:] + return data + def generate_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters): - builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database") - if not os.path.isdir(builtin_database_dir): - raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n' - 'Your version of ViennaCL may be too old and/or corrupted.') - - function_name_dict = { vcl.float32: 'add_4B', - vcl.float64: 'add_8B' } - - additional_parameters_dict = {'N': "char_to_type<'N'>", - 'T': "char_to_type<'T'>"} - - #Create the device-specific headers - cpp_device_name = utils.sanitize_string(device.name) - function_name = function_name_dict[datatype] - operation = operation.replace('-','_') - - cpp_class_name = operation + '_template' - header_name = cpp_device_name + ".hpp" - function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \ - [additional_parameters_dict[x] for x in additional_parameters]) + ')' - - device_type_prefix = utils.DEVICE_TYPE_PREFIX[device.type] - vendor_prefix = utils.VENDOR_PREFIX[device.vendor_id] - architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name) - - header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family] - header_directory = os.path.join(builtin_database_dir, *header_hierarchy) - header_path = os.path.join(header_directory, header_name) - - if not os.path.exists(header_directory): - os.makedirs(header_directory) - - if os.path.exists(header_path): - with open (header_path, "r") as myfile: - data=myfile.read() - else: - data = '' + builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database") + if not os.path.isdir(builtin_database_dir): + raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n' + 'Your version of ViennaCL may be too old and/or corrupted.') - if not data: - ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper() - data = ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n' - '#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n' - '\n' - '#include "viennacl/device_specific/forwards.h"\n' - '#include "viennacl/device_specific/builtin_database/common.hpp"\n' - '\n' - 'namespace viennacl{\n' - 'namespace device_specific{\n' - 'namespace builtin_database{\n' - 'namespace devices{\n' - 'namespace ' + device_type_prefix + '{\n' - 'namespace ' + vendor_prefix + '{\n' - 'namespace ' + architecture_family + '{\n' - 'namespace ' + cpp_device_name + '{\n' - '\n' - '}\n' - '}\n' - '}\n' - '}\n' - '}\n' - '}\n' - '}\n' - '}\n' - '#endif\n' - '') - - data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp') - - add_to_database_arguments = [vendor_prefix + '_id', utils.DEVICE_TYPE_CL_NAME[device.type], 'ocl::'+architecture_family, - '"' + device.name + '"', cpp_class_name + '::parameters' + str(parameters)] - core = ' db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');' - - already_declared = data.find(function_declaration) - if already_declared==-1: - substr = 'namespace ' + cpp_device_name + '{\n' - insert_index = data.index(substr) + len(substr) - data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:] - else: - i1 = data.find('{', already_declared) - if data[i1-1]=='\n': - i1 = i1 - 1 - i2 = data.find('}', already_declared) + 1 - data = data[:i1] + '\n{\n' + core + '\n}' + data[i2:] + function_name_dict = { vcl.float32: 'add_4B', + vcl.float64: 'add_8B' } - #Write the header file - with open(header_path, "w+") as myfile: - myfile.write(data) - - #Updates the global ViennaCL headers - with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header: - data = operation_header.read() - data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir))) - - scope_name = '_'.join(('init', operation) + additional_parameters) - scope = data.index(scope_name) - function_call = ' ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')' - if function_call not in data: - insert_index = data.rindex('\n', 0, data.index('return result', scope)) - data = data[:insert_index] + function_call + ';\n' + data[insert_index:] + additional_parameters_dict = {'N': "char_to_type<'N'>", + 'T': "char_to_type<'T'>"} - operation_header.seek(0) - operation_header.truncate() - operation_header.write(data) + #Create the device-specific headers + cpp_device_name = utils.sanitize_string(device.name) + function_name = function_name_dict[datatype] + operation = operation.replace('-','_') + + cpp_class_name = operation + '_template' + header_name = cpp_device_name + ".hpp" + function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \ + [additional_parameters_dict[x] for x in additional_parameters]) + ')' + + device_type_prefix = utils.DEVICE_TYPE_PREFIX[device.type] + vendor_prefix = utils.VENDOR_PREFIX[device.vendor_id] + architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name) + + header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family] + header_directory = os.path.join(builtin_database_dir, *header_hierarchy) + header_path = os.path.join(header_directory, header_name) + + if not os.path.exists(header_directory): + os.makedirs(header_directory) + + if os.path.exists(header_path): + with open (header_path, "r") as myfile: + data=myfile.read() + else: + data = '' + + if not data: + ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper() + data = ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n' + '#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n' + '\n' + '#include "viennacl/device_specific/forwards.h"\n' + '#include "viennacl/device_specific/builtin_database/common.hpp"\n' + '\n' + 'namespace viennacl{\n' + 'namespace device_specific{\n' + 'namespace builtin_database{\n' + 'namespace devices{\n' + 'namespace ' + device_type_prefix + '{\n' + 'namespace ' + vendor_prefix + '{\n' + 'namespace ' + architecture_family + '{\n' + 'namespace ' + cpp_device_name + '{\n' + '\n' + '}\n' + '}\n' + '}\n' + '}\n' + '}\n' + '}\n' + '}\n' + '}\n' + '#endif\n' + '') + + data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp') + + add_to_database_arguments = [vendor_prefix + '_id', utils.DEVICE_TYPE_CL_NAME[device.type], 'ocl::'+architecture_family, + '"' + device.name + '"', cpp_class_name + '::parameters' + str(parameters)] + core = ' db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');' + + already_declared = data.find(function_declaration) + if already_declared==-1: + substr = 'namespace ' + cpp_device_name + '{\n' + insert_index = data.index(substr) + len(substr) + data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:] + else: + i1 = data.find('{', already_declared) + if data[i1-1]=='\n': + i1 = i1 - 1 + i2 = data.find('}', already_declared) + 1 + data = data[:i1] + '\n{\n' + core + '\n}' + data[i2:] + + #Write the header file + with open(header_path, "w+") as myfile: + myfile.write(data) + + #Updates the global ViennaCL headers + with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header: + data = operation_header.read() + data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir))) + + scope_name = '_'.join(('init', operation) + additional_parameters) + scope = data.index(scope_name) + function_call = ' ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')' + if function_call not in data: + insert_index = data.rindex('\n', 0, data.index('return result', scope)) + data = data[:insert_index] + function_call + ';\n' + data[insert_index:] + + operation_header.seek(0) + operation_header.truncate() + operation_header.write(data)