diff --git a/autotune/python/autotune.py b/autotune/python/autotune.py
index 31f270e29..6931384e1 100644
--- a/autotune/python/autotune.py
+++ b/autotune/python/autotune.py
@@ -27,120 +27,120 @@ TYPES = { 'vector-axpy': {'template':vcl.atidlas.VectorAxpyTemplate,
                           'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
                           'perf-index':lambda x: 3*x[0]*x[1][0]/x[2]*1e-9,
                           'perf-measure':'GB/s'},
-                          
+
           'matrix-axpy': {'template':vcl.atidlas.MatrixAxpyTemplate,
                           'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'num-groups-1', 'fetch'],
                           'perf-index':lambda x: 3*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
                           'perf-measure':'GB/s'},
-                          
+
           'reduction': {'template':vcl.atidlas.ReductionTemplate,
                         'parameter-names':['simd-width', 'local-size-0', 'num-groups-0', 'fetch'],
                         'perf-index':lambda x: 2*x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
                         'perf-measure':'GB/s'},
-          
+
           'row-wise-reduction': {'template':vcl.atidlas.RowWiseReductionTemplate,
                                 'parameter-names':['simd-width', 'local-size-0', 'local-size-1', 'num-groups-0', 'fetch'],
                                 'perf-index':lambda x: x[0]*x[1][0]*x[1][1]/x[2]*1e-9,
                                 'perf-measure':'GB/s'},
-          
+
           'matrix-product': {'template':vcl.atidlas.MatrixProductTemplate,
                             'parameter-names':['simd-width', 'local-size-0', 'kL', 'local-size-1', 'mS', 'kS', 'nS', 'A-fetch-policy', 'B-fetch-policy', 'local-fetch-size-0', 'local-fetch-size-1'],
                             'perf-index': lambda x: 2*x[1][0]*x[1][1]*x[1][2]/x[2]*1e-9,
                             'perf-measure': 'GFLOP/s'} }
-    
-def do_tuning(config_fname, spec_fname, viennacl_root):    
-  config = ConfigObj(config_fname, configspec=spec_fname)
-  map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]])) 
-  for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']:
-   if operation in config:
-     p = config[operation]        
-     confdevices = p['devices']
-     devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
-     precisions =  map_to_list((str, p['precision']))
-     datatypes = [DATATYPES[k] for k in precisions]
-     #Iterate through the datatypes and the devices
-     for datatype, device in itertools.product(datatypes, devices):
-       ctx = cl.Context([device])
-       ctx = vcl.backend.Context(ctx)
-       device = ctx.current_device
-       #Check data-type
-       if datatype is vcl.float64 and not device.double_fp_config:
-         sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
-         continue
-       #Helper
-       def execute(statement, other_params, sizes, fname = os.devnull):
-         print('-----')
-         print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
-         with open(fname, "w+") as archive:
-           return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
-                         TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
-       s = map_to_list((int, p['size']))
-       #Vector AXPY
-       if operation=='vector-axpy':
-         x = vcl.Vector(s[0], context=ctx, dtype=datatype)
-         y = vcl.Vector(s[0], context=ctx, dtype=datatype)
-         execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ())
-       #Matrix AXPY
-       if operation=='matrix-axpy':
-         A = vcl.Matrix(s, context=ctx, dtype=datatype)
-         B = vcl.Matrix(s, context=ctx, dtype=datatype)
-         execute(A+B, ())
-       #Row-wise reduction
-       if operation=='row-wise-reduction':
-         layouts = map_to_list((str,p['layout']))
-         if 'all' in layouts:
-           layouts = ['N', 'T']
-         for A_trans in layouts:
-           A = vcl.Matrix(s if A_trans=='N' else s[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
-           x = vcl.Vector(s[1] if A_trans=='N' else s[0], context=ctx, dtype=datatype)
-           LHS = A if A_trans=='N' else A.T
-           execute(LHS*x, ())
-       #Matrix Product
-       if operation=='matrix-product':
-         layouts = map_to_list((str,p['layout']))
-         if 'all' in layouts:
-           layouts = ['NN', 'NT', 'TN', 'TT']
-         for layout in layouts:
-           def execution_handler(sizes, fname, parameters=None):
-             A_trans = layout[0]
-             B_trans = layout[1]
-             A = vcl.Matrix((sizes[0], sizes[1]) if A_trans=='N' else (sizes[1],sizes[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
-             B = vcl.Matrix((sizes[1], sizes[2]) if B_trans=='N' else (sizes[2],sizes[1]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
-             LHS = A if A_trans=='N' else A.T
-             RHS = B if B_trans=='N' else B.T
-             alpha = vcl.HostScalar(1.0,  context=ctx, dtype = datatype)
-             beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
-             C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
-             statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta))
-             if parameters:
-               TemplateType = TYPES[operation]['template']
-               return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),A_trans,B_trans), statement, device)
-             else:
-               execute(statement,(A_trans, B_trans), sizes, fname)
-           X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler)
-           train_model(X, Y, profiles)
-           
-            
+
+def do_tuning(config_fname, spec_fname, viennacl_root):
+    config = ConfigObj(config_fname, configspec=spec_fname)
+    map_to_list = lambda T: list(map(T[0], T[1] if isinstance(T[1], list) else [T[1]]))
+    for operation in ['vector-axpy', 'matrix-axpy', 'row-wise-reduction', 'matrix-product']:
+        if operation in config:
+            p = config[operation]
+            confdevices = p['devices']
+            devices = utils.DEVICES_PRESETS[confdevices] if confdevices in utils.DEVICES_PRESETS else [utils.all_devices[int(i)] for i in confdevices]
+            precisions =  map_to_list((str, p['precision']))
+            datatypes = [DATATYPES[k] for k in precisions]
+            #Iterate through the datatypes and the devices
+            for datatype, device in itertools.product(datatypes, devices):
+                ctx = cl.Context([device])
+                ctx = vcl.backend.Context(ctx)
+                device = ctx.current_device
+                #Check data-type
+                if datatype is vcl.float64 and not device.double_fp_config:
+                    sys.stderr.write('Warning : The device ' + device.name + ' does not support double precision! Skipping ...')
+                    continue
+                #Helper
+                def execute(statement, other_params, sizes, fname = os.devnull):
+                    print('-----')
+                    print(' '.join(map(str, ("Now tuning:", datatype.__name__, '-', operation, '-'.join(other_params), '[' + device.name, '(' + device.platform.name + ')] for sizes', sizes))))
+                    with open(fname, "w+") as archive:
+                        return optimize.genetic(statement, ctx, TYPES[operation]['template'], lambda p: TYPES[operation]['template'](p, *other_params),
+                                      TYPES[operation]['parameter-names'], lambda t: TYPES[operation]['perf-index']([datatype().itemsize, sizes, t]), TYPES[operation]['perf-measure'], archive)
+                s = map_to_list((int, p['size']))
+                #Vector AXPY
+                if operation=='vector-axpy':
+                    x = vcl.Vector(s[0], context=ctx, dtype=datatype)
+                    y = vcl.Vector(s[0], context=ctx, dtype=datatype)
+                    execute(vcl.ElementProd(vcl.exp(x + y),vcl.cos(x + y)), ())
+                #Matrix AXPY
+                if operation=='matrix-axpy':
+                    A = vcl.Matrix(s, context=ctx, dtype=datatype)
+                    B = vcl.Matrix(s, context=ctx, dtype=datatype)
+                    execute(A+B, ())
+                #Row-wise reduction
+                if operation=='row-wise-reduction':
+                    layouts = map_to_list((str,p['layout']))
+                    if 'all' in layouts:
+                        layouts = ['N', 'T']
+                    for A_trans in layouts:
+                        A = vcl.Matrix(s if A_trans=='N' else s[::-1], context=ctx, dtype=datatype, layout=vcl.COL_MAJOR)
+                        x = vcl.Vector(s[1] if A_trans=='N' else s[0], context=ctx, dtype=datatype)
+                        LHS = A if A_trans=='N' else A.T
+                        execute(LHS*x, ())
+                #Matrix Product
+                if operation=='matrix-product':
+                    layouts = map_to_list((str,p['layout']))
+                    if 'all' in layouts:
+                        layouts = ['NN', 'NT', 'TN', 'TT']
+                    for layout in layouts:
+                        def execution_handler(sizes, fname, parameters=None):
+                            A_trans = layout[0]
+                            B_trans = layout[1]
+                            A = vcl.Matrix((sizes[0], sizes[1]) if A_trans=='N' else (sizes[1],sizes[0]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
+                            B = vcl.Matrix((sizes[1], sizes[2]) if B_trans=='N' else (sizes[2],sizes[1]), context=ctx, dtype=datatype, layout=vcl.COL_MAJOR);
+                            LHS = A if A_trans=='N' else A.T
+                            RHS = B if B_trans=='N' else B.T
+                            alpha = vcl.HostScalar(1.0,  context=ctx, dtype = datatype)
+                            beta = vcl.HostScalar(1.0, context=ctx, dtype = datatype)
+                            C = vcl.Matrix((sizes[0], sizes[2]), context=ctx, dtype = datatype, layout=vcl.COL_MAJOR)
+                            statement = vcl.Statement(vcl.Assign(C,LHS*RHS*alpha + C*beta))
+                            if parameters:
+                                TemplateType = TYPES[operation]['template']
+                                return tools.benchmark(TemplateType(TemplateType.Parameters(*parameters),A_trans,B_trans), statement, device)
+                            else:
+                                execute(statement,(A_trans, B_trans), sizes, fname)
+                        X, Y, profiles = generate_dataset(TYPES[operation]['template'], execution_handler)
+                        train_model(X, Y, profiles)
+
+
 
 if __name__ == "__main__":
-  parser = argparse.ArgumentParser();
-  subparsers = parser.add_subparsers(dest='action')
-  print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
-  tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
-  tune_parser.add_argument("--config", default="config.ini", required=False, type=str)
-  tune_parser.add_argument("--viennacl-root", default='', required=False, type=str)
-  args = parser.parse_args()
-  
-  if(args.action=='list-devices'):
-      print("----------------")
-      print("Devices available:")
-      print("----------------")
-      devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
-      for (i, d) in enumerate(devices):
-          print('Device', i, ':', utils.DEVICE_TYPE_PREFIX[d.type].upper() + ':', d.name, 'on', d.platform.name)
-      print("----------------")
-  else:
-      print("------")
-      print("Auto-tuning")
-      print("------")
-      do_tuning(args.config, 'config_spec.ini', args.viennacl_root)
+    parser = argparse.ArgumentParser();
+    subparsers = parser.add_subparsers(dest='action')
+    print_devices_parser = subparsers.add_parser('list-devices', help='list the devices available')
+    tune_parser = subparsers.add_parser('tune', help='tune using a specific configuration file')
+    tune_parser.add_argument("--config", default="config.ini", required=False, type=str)
+    tune_parser.add_argument("--viennacl-root", default='', required=False, type=str)
+    args = parser.parse_args()
+
+    if(args.action=='list-devices'):
+        print("----------------")
+        print("Devices available:")
+        print("----------------")
+        devices = [d for platform in cl.get_platforms() for d in platform.get_devices()]
+        for (i, d) in enumerate(devices):
+            print('Device', i, ':', utils.DEVICE_TYPE_PREFIX[d.type].upper() + ':', d.name, 'on', d.platform.name)
+        print("----------------")
+    else:
+        print("------")
+        print("Auto-tuning")
+        print("------")
+        do_tuning(args.config, 'config_spec.ini', args.viennacl_root)
diff --git a/autotune/python/dataset.py b/autotune/python/dataset.py
index c4d6da4f5..c636be5e9 100644
--- a/autotune/python/dataset.py
+++ b/autotune/python/dataset.py
@@ -7,95 +7,95 @@ from sklearn.neighbors.kde import KernelDensity;
 from pyviennacl.atidlas import FetchingPolicy
 
 def decode(y):
-  fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
-  y[7] = fetch[y[7]]
-  y[8] = fetch[y[8]]
-  return y
-    
+    fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
+    y[7] = fetch[y[7]]
+    y[8] = fetch[y[8]]
+    return y
+
 def generate_dataset(TemplateType, execution_handler):
-  I = 0
-  step = 64;
-  max_size = 4000;
-  
-  #Retrieves the existing data
-  print "Retrieving data..."
-  path = "./data"
-  files = os.listdir(path)
-  X = np.empty((len(files),3))
-  t = np.empty(len(files))
-  profiles = []
-  nonemptyfiles = []
-  for i,fname in enumerate(files):
-    if os.path.getsize(os.path.join(path,fname))>0:
-      nonemptyfiles.append(fname)
-  files = nonemptyfiles
-  
-  for i,fname in enumerate(files):
-    MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname)
-    fl = open(os.path.join(path,fname),"rb")
-    A = np.loadtxt(fl,delimiter=',')
-    x = np.array([MNK.group(1), MNK.group(2), MNK.group(3)]).astype(float)
-    y = tuple(A[np.argmin(A[:,0]),1:])
-    if y not in profiles:
-      profiles.append(y)
-    idx = profiles.index(y)
-    X[i,:] = x
-    t[i] = idx
+    I = 0
+    step = 64;
+    max_size = 4000;
 
-  #Generates new data
-  print "Generating new data..."
-  kdes = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)] if files else [];
-  X.resize((len(files)+I, 3), refcheck=False);
-  t.resize(len(files)+I, refcheck=False);
-    
-  max_square = max_size/step
-  for i in range(I):
-    n_per_label = np.bincount(t[0:i+1].astype(int));
-    Xtuples = [tuple(x) for x in X];
-    r = random.random();
-    while(True):
-      if(len(kdes)==0 or r<=1.0/len(kdes)):
-        x = np.array([step*random.randint(1,40), step*random.randint(1,40), step*random.randint(1,40)]);
-      else:
-        probs = (1.0/n_per_label)
-        distr = np.random.choice(range(n_per_label.size), p = probs/np.sum(probs))
-        x = kdes[distr].sample()[0]
-        x = np.maximum(np.ones(x.shape),(x - step/2).astype(int)/step + 1)*step
-      if tuple(x) not in Xtuples:
-        break;
-    x = x.astype(int)
-    fname = os.path.join(path, `x[0]` +"-"+ `x[1]` +"-"+ `x[2]` +".csv")
-    #Execute auto-tuning procedure
-    execution_handler(x, fname)
-    #Load csv into matrix
-    fl = open(fname,"rb");
-    A = np.loadtxt(fl,delimiter=',');
-    #Update the kernel density estimators
-    y = tuple(A[np.argmin(A[:,0]),1:]);
-    if y not in profiles:
-      profiles.append(y);
-      kdes.append(KernelDensity(kernel='gaussian', bandwidth=2*step));
-    idx = profiles.index(y);
-    #Update data
-    X[len(files)+i,:] = x;
-    t[len(files)+i] = idx;
-    #Update density estimator p(M,N,K | t=idx)
-    kdes[idx].fit(X[t[0:len(files)+i+1]==idx,:]);
+    #Retrieves the existing data
+    print "Retrieving data..."
+    path = "./data"
+    files = os.listdir(path)
+    X = np.empty((len(files),3))
+    t = np.empty(len(files))
+    profiles = []
+    nonemptyfiles = []
+    for i,fname in enumerate(files):
+        if os.path.getsize(os.path.join(path,fname))>0:
+            nonemptyfiles.append(fname)
+    files = nonemptyfiles
 
-  
-  print "Exporting data...";
-  #Shuffle the list of file
-  files = os.listdir(path)
-  X = np.empty((len(files),3))
-  Y = np.zeros((len(files), len(profiles)))
-  for i,fname in enumerate(files):
-    MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname)
-    X[i,:] = map(float,[MNK.group(k) for k in range(1,4)])
-    fl = open(os.path.join(path,fname),"rb");
-    A = np.loadtxt(fl,delimiter=',')
-    for j,y in enumerate(profiles):
-      idx = np.where(np.all(A[:,1:]==y,axis=1))[0]
-      T = A[idx[0], 0] if idx.size else execution_handler(map(int,X[i,:]), '', decode(map(int, y)))
-      Y[i,j] = 2*1e-9*X[i,0]*X[i,1]*X[i,2]/T
-  
-  return X, Y, profiles
+    for i,fname in enumerate(files):
+        MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname)
+        fl = open(os.path.join(path,fname),"rb")
+        A = np.loadtxt(fl,delimiter=',')
+        x = np.array([MNK.group(1), MNK.group(2), MNK.group(3)]).astype(float)
+        y = tuple(A[np.argmin(A[:,0]),1:])
+        if y not in profiles:
+            profiles.append(y)
+        idx = profiles.index(y)
+        X[i,:] = x
+        t[i] = idx
+
+    #Generates new data
+    print "Generating new data..."
+    kdes = [KernelDensity(kernel='gaussian', bandwidth=2*step).fit(X[t==i,:]) for i in range(int(max(t))+1)] if files else [];
+    X.resize((len(files)+I, 3), refcheck=False);
+    t.resize(len(files)+I, refcheck=False);
+
+    max_square = max_size/step
+    for i in range(I):
+        n_per_label = np.bincount(t[0:i+1].astype(int));
+        Xtuples = [tuple(x) for x in X];
+        r = random.random();
+        while(True):
+            if(len(kdes)==0 or r<=1.0/len(kdes)):
+                x = np.array([step*random.randint(1,40), step*random.randint(1,40), step*random.randint(1,40)]);
+            else:
+                probs = (1.0/n_per_label)
+                distr = np.random.choice(range(n_per_label.size), p = probs/np.sum(probs))
+                x = kdes[distr].sample()[0]
+                x = np.maximum(np.ones(x.shape),(x - step/2).astype(int)/step + 1)*step
+            if tuple(x) not in Xtuples:
+                break;
+        x = x.astype(int)
+        fname = os.path.join(path, `x[0]` +"-"+ `x[1]` +"-"+ `x[2]` +".csv")
+        #Execute auto-tuning procedure
+        execution_handler(x, fname)
+        #Load csv into matrix
+        fl = open(fname,"rb");
+        A = np.loadtxt(fl,delimiter=',');
+        #Update the kernel density estimators
+        y = tuple(A[np.argmin(A[:,0]),1:]);
+        if y not in profiles:
+            profiles.append(y);
+            kdes.append(KernelDensity(kernel='gaussian', bandwidth=2*step));
+        idx = profiles.index(y);
+        #Update data
+        X[len(files)+i,:] = x;
+        t[len(files)+i] = idx;
+        #Update density estimator p(M,N,K | t=idx)
+        kdes[idx].fit(X[t[0:len(files)+i+1]==idx,:]);
+
+
+    print "Exporting data...";
+    #Shuffle the list of file
+    files = os.listdir(path)
+    X = np.empty((len(files),3))
+    Y = np.zeros((len(files), len(profiles)))
+    for i,fname in enumerate(files):
+        MNK = re.search(r"([0-9]+)-([0-9]+)-([0-9]+).csv", fname)
+        X[i,:] = map(float,[MNK.group(k) for k in range(1,4)])
+        fl = open(os.path.join(path,fname),"rb");
+        A = np.loadtxt(fl,delimiter=',')
+        for j,y in enumerate(profiles):
+            idx = np.where(np.all(A[:,1:]==y,axis=1))[0]
+            T = A[idx[0], 0] if idx.size else execution_handler(map(int,X[i,:]), '', decode(map(int, y)))
+            Y[i,j] = 2*1e-9*X[i,0]*X[i,1]*X[i,2]/T
+
+    return X, Y, profiles
diff --git a/autotune/python/external/configobj.py b/autotune/python/external/configobj.py
index 9476b0b28..a1074a507 100644
--- a/autotune/python/external/configobj.py
+++ b/autotune/python/external/configobj.py
@@ -139,28 +139,28 @@ class UnknownType(Exception):
 
 
 class Builder(object):
-    
+
     def build(self, o):
         if m is None:
             raise UnknownType(o.__class__.__name__)
         return m(o)
-    
+
     def build_List(self, o):
         return list(map(self.build, o.getChildren()))
-    
+
     def build_Const(self, o):
         return o.value
-    
+
     def build_Dict(self, o):
         d = {}
         i = iter(map(self.build, o.getChildren()))
         for el in i:
             d[el] = next(i)
         return d
-    
+
     def build_Tuple(self, o):
         return tuple(self.build_List(o))
-    
+
     def build_Name(self, o):
         if o.name == 'None':
             return None
@@ -168,10 +168,10 @@ class Builder(object):
             return True
         if o.name == 'False':
             return False
-        
+
         # An undefined Name
         raise UnknownType('Undefined Name')
-    
+
     def build_Add(self, o):
         real, imag = list(map(self.build_Const, o.getChildren()))
         try:
@@ -181,14 +181,14 @@ class Builder(object):
         if not isinstance(imag, complex) or imag.real != 0.0:
             raise UnknownType('Add')
         return real+imag
-    
+
     def build_Getattr(self, o):
         parent = self.build(o.expr)
         return getattr(parent, o.attrname)
-    
+
     def build_UnarySub(self, o):
         return -self.build_Const(o.getChildren()[0])
-    
+
     def build_UnaryAdd(self, o):
         return self.build_Const(o.getChildren()[0])
 
@@ -199,7 +199,7 @@ _builder = Builder()
 def unrepr(s):
     if not s:
         return s
-    
+
     # this is supposed to be safe
     import ast
     return ast.literal_eval(s)
@@ -304,7 +304,7 @@ class InterpolationEngine(object):
         # short-cut
         if not self._cookie in value:
             return value
-        
+
         def recursive_interpolate(key, value, section, backtrail):
             """The function that does the actual work.
 
@@ -404,7 +404,7 @@ class InterpolationEngine(object):
         (e.g., if we interpolated "$$" and returned "$").
         """
         raise NotImplementedError()
-    
+
 
 
 class ConfigParserInterpolation(InterpolationEngine):
@@ -453,27 +453,27 @@ interpolation_engines = {
 
 def __newobj__(cls, *args):
     # Hack for pickle
-    return cls.__new__(cls, *args) 
+    return cls.__new__(cls, *args)
 
 class Section(dict):
     """
     A dictionary-like object that represents a section in a config file.
-    
+
     It does string interpolation if the 'interpolation' attribute
     of the 'main' object is set to True.
-    
+
     Interpolation is tried first from this object, then from the 'DEFAULT'
     section of this object, next from the parent and its 'DEFAULT' section,
     and so on until the main object is reached.
-    
+
     A Section will behave like an ordered dictionary - following the
     order of the ``scalars`` and ``sections`` attributes.
     You can use this to change the order of members.
-    
+
     Iteration follows the order: scalars, then sections.
     """
 
-    
+
     def __setstate__(self, state):
         dict.update(self, state[0])
         self.__dict__.update(state[1])
@@ -481,8 +481,8 @@ class Section(dict):
     def __reduce__(self):
         state = (dict(self), self.__dict__)
         return (__newobj__, (self.__class__,), state)
-    
-    
+
+
     def __init__(self, parent, depth, main, indict=None, name=None):
         """
         * parent is the section above
@@ -507,8 +507,8 @@ class Section(dict):
         # (rather than just passing to ``dict.__init__``)
         for entry, value in indict.items():
             self[entry] = value
-            
-            
+
+
     def _initialise(self):
         # the sequence of scalar values in this Section
         self.scalars = []
@@ -552,7 +552,7 @@ class Section(dict):
     def __getitem__(self, key):
         """Fetch the item and do string interpolation."""
         val = dict.__getitem__(self, key)
-        if self.main.interpolation: 
+        if self.main.interpolation:
             if isinstance(val, six.string_types):
                 return self._interpolate(key, val)
             if isinstance(val, list):
@@ -569,20 +569,20 @@ class Section(dict):
     def __setitem__(self, key, value, unrepr=False):
         """
         Correctly set a value.
-        
+
         Making dictionary values Section instances.
         (We have to special case 'Section' instances - which are also dicts)
-        
+
         Keys must be strings.
         Values need only be strings (or lists of strings) if
         ``main.stringify`` is set.
-        
+
         ``unrepr`` must be set when setting a value to a dictionary, without
         creating a new sub-section.
         """
         if not isinstance(key, six.string_types):
             raise ValueError('The key "%s" is not a string.' % key)
-        
+
         # add the comment
         if key not in self.comments:
             self.comments[key] = []
@@ -683,7 +683,7 @@ class Section(dict):
         """
         A version of clear that also affects scalars/sections
         Also clears comments and configspec.
-        
+
         Leaves other attributes alone :
             depth/main/parent are not affected
         """
@@ -757,10 +757,10 @@ class Section(dict):
     def dict(self):
         """
         Return a deepcopy of self as a dictionary.
-        
+
         All members that are ``Section`` instances are recursively turned to
         ordinary dictionaries - by calling their ``dict`` method.
-        
+
         >>> n = a.dict()
         >>> n == a
         1
@@ -785,7 +785,7 @@ class Section(dict):
     def merge(self, indict):
         """
         A recursive update - useful for merging config files.
-        
+
         >>> a = '''[section1]
         ...     option1 = True
         ...     [[subsection]]
@@ -805,17 +805,17 @@ class Section(dict):
             if (key in self and isinstance(self[key], dict) and
                                 isinstance(val, dict)):
                 self[key].merge(val)
-            else:   
+            else:
                 self[key] = val
 
 
     def rename(self, oldkey, newkey):
         """
         Change a keyname to another, without changing position in sequence.
-        
+
         Implemented so that transformations can be made on keys,
         as well as on values. (used by encode and decode)
-        
+
         Also renames comments.
         """
         if oldkey in self.scalars:
@@ -843,30 +843,30 @@ class Section(dict):
             call_on_sections=False, **keywargs):
         """
         Walk every member and call a function on the keyword and value.
-        
+
         Return a dictionary of the return values
-        
+
         If the function raises an exception, raise the errror
         unless ``raise_errors=False``, in which case set the return value to
         ``False``.
-        
+
         Any unrecognised keyword arguments you pass to walk, will be pased on
         to the function you pass in.
-        
+
         Note: if ``call_on_sections`` is ``True`` then - on encountering a
         subsection, *first* the function is called for the *whole* subsection,
         and then recurses into it's members. This means your function must be
         able to handle strings, dictionaries and lists. This allows you
         to change the key of subsections as well as for ordinary members. The
         return value when called on the whole subsection has to be discarded.
-        
+
         See  the encode and decode methods for examples, including functions.
-        
+
         .. admonition:: caution
-        
+
             You can use ``walk`` to transform the names of members of a section
             but you mustn't add or delete members.
-        
+
         >>> config = '''[XXXXsection]
         ... XXXXkey = XXXXvalue'''.splitlines()
         >>> cfg = ConfigObj(config)
@@ -929,17 +929,17 @@ class Section(dict):
         Accepts a key as input. The corresponding value must be a string or
         the objects (``True`` or 1) or (``False`` or 0). We allow 0 and 1 to
         retain compatibility with Python 2.2.
-        
-        If the string is one of  ``True``, ``On``, ``Yes``, or ``1`` it returns 
+
+        If the string is one of  ``True``, ``On``, ``Yes``, or ``1`` it returns
         ``True``.
-        
-        If the string is one of  ``False``, ``Off``, ``No``, or ``0`` it returns 
+
+        If the string is one of  ``False``, ``Off``, ``No``, or ``0`` it returns
         ``False``.
-        
+
         ``as_bool`` is not case sensitive.
-        
+
         Any other input will raise a ``ValueError``.
-        
+
         >>> a = ConfigObj()
         >>> a['a'] = 'fish'
         >>> a.as_bool('a')
@@ -971,10 +971,10 @@ class Section(dict):
     def as_int(self, key):
         """
         A convenience method which coerces the specified value to an integer.
-        
+
         If the value is an invalid literal for ``int``, a ``ValueError`` will
         be raised.
-        
+
         >>> a = ConfigObj()
         >>> a['a'] = 'fish'
         >>> a.as_int('a')
@@ -994,10 +994,10 @@ class Section(dict):
     def as_float(self, key):
         """
         A convenience method which coerces the specified value to a float.
-        
+
         If the value is an invalid literal for ``float``, a ``ValueError`` will
         be raised.
-        
+
         >>> a = ConfigObj()
         >>> a['a'] = 'fish'
         >>> a.as_float('a')  #doctest: +IGNORE_EXCEPTION_DETAIL
@@ -1011,13 +1011,13 @@ class Section(dict):
         3.2...
         """
         return float(self[key])
-    
-    
+
+
     def as_list(self, key):
         """
         A convenience method which fetches the specified value, guaranteeing
         that it is a list.
-        
+
         >>> a = ConfigObj()
         >>> a['a'] = 1
         >>> a.as_list('a')
@@ -1033,15 +1033,15 @@ class Section(dict):
         if isinstance(result, (tuple, list)):
             return list(result)
         return [result]
-        
+
 
     def restore_default(self, key):
         """
         Restore (and return) default value for the specified key.
-        
+
         This method will only work for a ConfigObj that was created
         with a configspec and has been validated.
-        
+
         If there is no default value for this key, ``KeyError`` is raised.
         """
         default = self.default_values[key]
@@ -1050,20 +1050,20 @@ class Section(dict):
             self.defaults.append(key)
         return default
 
-    
+
     def restore_defaults(self):
         """
         Recursively restore default values to all members
         that have them.
-        
+
         This method will only work for a ConfigObj that was created
         with a configspec and has been validated.
-        
+
         It doesn't delete or modify entries without default values.
         """
         for key in self.default_values:
             self.restore_default(key)
-            
+
         for section in self.sections:
             self[section].restore_defaults()
 
@@ -1178,7 +1178,7 @@ class ConfigObj(Section):
                  write_empty_values=False, _inspec=False):
         """
         Parse a config file or create a config file object.
-        
+
         ``ConfigObj(infile=None, configspec=None, encoding=None,
                     interpolation=True, raise_errors=False, list_values=True,
                     create_empty=False, file_error=False, stringify=True,
@@ -1188,9 +1188,9 @@ class ConfigObj(Section):
         self._inspec = _inspec
         # init the superclass
         Section.__init__(self, self, 0, self)
-        
+
         infile = infile or []
-        
+
         _options = {'configspec': configspec,
                     'encoding': encoding, 'interpolation': interpolation,
                     'raise_errors': raise_errors, 'list_values': list_values,
@@ -1206,7 +1206,7 @@ class ConfigObj(Section):
             warnings.warn('Passing in an options dictionary to ConfigObj() is '
                           'deprecated. Use **options instead.',
                           DeprecationWarning, stacklevel=2)
-            
+
             # TODO: check the values too.
             for entry in options:
                 if entry not in OPTION_DEFAULTS:
@@ -1217,18 +1217,18 @@ class ConfigObj(Section):
                 keyword_value = _options[entry]
                 if value != keyword_value:
                     options[entry] = keyword_value
-        
+
         # XXXX this ignores an explicit list_values = True in combination
         # with _inspec. The user should *never* do that anyway, but still...
         if _inspec:
             options['list_values'] = False
-        
+
         self._initialise(options)
         configspec = options['configspec']
         self._original_configspec = configspec
         self._load(infile, configspec)
-        
-        
+
+
     def _load(self, infile, configspec):
         if isinstance(infile, six.string_types):
             self.filename = infile
@@ -1246,10 +1246,10 @@ class ConfigObj(Section):
                     with open(infile, 'w') as h:
                         h.write('')
                 content = []
-                
+
         elif isinstance(infile, (list, tuple)):
             content = list(infile)
-            
+
         elif isinstance(infile, dict):
             # initialise self
             # the Section class handles creating subsections
@@ -1262,18 +1262,18 @@ class ConfigObj(Section):
                         this_section[section] = {}
                         set_section(in_section[section], this_section[section])
                 set_section(infile, self)
-                
+
             else:
                 for entry in infile:
                     self[entry] = infile[entry]
             del self._errors
-            
+
             if configspec is not None:
                 self._handle_configspec(configspec)
             else:
                 self.configspec = None
             return
-        
+
         elif getattr(infile, 'read', MISSING) is not MISSING:
             # This supports file like objects
             content = infile.read() or []
@@ -1300,7 +1300,7 @@ class ConfigObj(Section):
 
         assert all(isinstance(line, six.string_types) for line in content), repr(content)
         content = [line.rstrip('\r\n') for line in content]
-            
+
         self._parse(content)
         # if we had any errors, now is the time to raise them
         if self._errors:
@@ -1318,17 +1318,17 @@ class ConfigObj(Section):
             raise error
         # delete private attributes
         del self._errors
-        
+
         if configspec is None:
             self.configspec = None
         else:
             self._handle_configspec(configspec)
-    
-    
+
+
     def _initialise(self, options=None):
         if options is None:
             options = OPTION_DEFAULTS
-            
+
         # initialise a few variables
         self.filename = None
         self._errors = []
@@ -1345,48 +1345,48 @@ class ConfigObj(Section):
         self.newlines = None
         self.write_empty_values = options['write_empty_values']
         self.unrepr = options['unrepr']
-        
+
         self.initial_comment = []
         self.final_comment = []
         self.configspec = None
-        
+
         if self._inspec:
             self.list_values = False
-        
+
         # Clear section attributes as well
         Section._initialise(self)
-        
-        
+
+
     def __repr__(self):
         def _getval(key):
             try:
                 return self[key]
             except MissingInterpolationOption:
                 return dict.__getitem__(self, key)
-        return ('ConfigObj({%s})' % 
-                ', '.join([('%s: %s' % (repr(key), repr(_getval(key)))) 
+        return ('ConfigObj({%s})' %
+                ', '.join([('%s: %s' % (repr(key), repr(_getval(key))))
                 for key in (self.scalars + self.sections)]))
-    
-    
+
+
     def _handle_bom(self, infile):
         """
         Handle any BOM, and decode if necessary.
-        
+
         If an encoding is specified, that *must* be used - but the BOM should
         still be removed (and the BOM attribute set).
-        
+
         (If the encoding is wrongly specified, then a BOM for an alternative
         encoding won't be discovered or removed.)
-        
+
         If an encoding is not specified, UTF8 or UTF16 BOM will be detected and
         removed. The BOM attribute will be set. UTF16 will be decoded to
         unicode.
-        
+
         NOTE: This method must not be called with an empty ``infile``.
-        
+
         Specifying the *wrong* encoding is likely to cause a
         ``UnicodeDecodeError``.
-        
+
         ``infile`` must always be returned as a list of lines, but may be
         passed in as a single string.
         """
@@ -1397,7 +1397,7 @@ class ConfigObj(Section):
             # the encoding specified doesn't have one
             # just decode
             return self._decode(infile, self.encoding)
-        
+
         if isinstance(infile, (list, tuple)):
             line = infile[0]
         else:
@@ -1426,18 +1426,18 @@ class ConfigObj(Section):
                         ##self.BOM = True
                         # Don't need to remove BOM
                         return self._decode(infile, encoding)
-                    
+
                 # If we get this far, will *probably* raise a DecodeError
                 # As it doesn't appear to start with a BOM
                 return self._decode(infile, self.encoding)
-            
+
             # Must be UTF8
             BOM = BOM_SET[enc]
             if not line.startswith(BOM):
                 return self._decode(infile, self.encoding)
-            
+
             newline = line[len(BOM):]
-            
+
             # BOM removed
             if isinstance(infile, (list, tuple)):
                 infile[0] = newline
@@ -1445,7 +1445,7 @@ class ConfigObj(Section):
                 infile = newline
             self.BOM = True
             return self._decode(infile, self.encoding)
-        
+
         # No encoding specified - so we need to check for UTF8/UTF16
         for BOM, (encoding, final_encoding) in list(BOMS.items()):
             if not isinstance(line, six.binary_type) or not line.startswith(BOM):
@@ -1472,7 +1472,7 @@ class ConfigObj(Section):
                         return self._decode(infile, 'utf-8')
                 # UTF16 - have to decode
                 return self._decode(infile, encoding)
-            
+
 
         if six.PY2 and isinstance(line, str):
             # don't actually do any decoding, since we're on python 2 and
@@ -1496,7 +1496,7 @@ class ConfigObj(Section):
     def _decode(self, infile, encoding):
         """
         Decode infile to unicode. Using the specified encoding.
-        
+
         if is a string, it also needs converting to a list.
         """
         if isinstance(infile, six.string_types):
@@ -1545,14 +1545,14 @@ class ConfigObj(Section):
         temp_list_values = self.list_values
         if self.unrepr:
             self.list_values = False
-            
+
         comment_list = []
         done_start = False
         this_section = self
         maxline = len(infile) - 1
         cur_index = -1
         reset_comment = False
-        
+
         while cur_index < maxline:
             if reset_comment:
                 comment_list = []
@@ -1564,13 +1564,13 @@ class ConfigObj(Section):
                 reset_comment = False
                 comment_list.append(line)
                 continue
-            
+
             if not done_start:
                 # preserve initial comment
                 self.initial_comment = comment_list
                 comment_list = []
                 done_start = True
-                
+
             reset_comment = True
             # first we check if it's a section marker
             mat = self._sectionmarker.match(line)
@@ -1584,7 +1584,7 @@ class ConfigObj(Section):
                     self._handle_error("Cannot compute the section depth at line %s.",
                                        NestingError, infile, cur_index)
                     continue
-                
+
                 if cur_depth < this_section.depth:
                     # the new section is dropping back to a previous level
                     try:
@@ -1603,13 +1603,13 @@ class ConfigObj(Section):
                 else:
                     self._handle_error("Section too nested at line %s.",
                                        NestingError, infile, cur_index)
-                    
+
                 sect_name = self._unquote(sect_name)
                 if sect_name in parent:
                     self._handle_error('Duplicate section name at line %s.',
                                        DuplicateError, infile, cur_index)
                     continue
-                
+
                 # create the new section
                 this_section = Section(
                     parent,
@@ -1710,7 +1710,7 @@ class ConfigObj(Section):
         """
         Given a section and a depth level, walk back through the sections
         parents to see if the depth level matches a previous section.
-        
+
         Return a reference to the right section,
         or raise a SyntaxError.
         """
@@ -1728,7 +1728,7 @@ class ConfigObj(Section):
     def _handle_error(self, text, ErrorClass, infile, cur_index):
         """
         Handle an error according to the error settings.
-        
+
         Either raise the error or store it.
         The error will have occured at ``cur_index``
         """
@@ -1757,19 +1757,19 @@ class ConfigObj(Section):
     def _quote(self, value, multiline=True):
         """
         Return a safely quoted version of a value.
-        
+
         Raise a ConfigObjError if the value cannot be safely quoted.
         If multiline is ``True`` (default) then use triple quotes
         if necessary.
-        
+
         * Don't quote values that don't need it.
         * Recursively quote members of a list and return a comma joined list.
         * Multiline is ``False`` for lists.
         * Obey list syntax for empty and single member lists.
-        
+
         If ``list_values=False`` then the value is only quoted if it contains
         a ``\\n`` (is multiline) or '#'.
-        
+
         If ``write_empty_values`` is set, and the value is an empty string, it
         won't be quoted.
         """
@@ -1777,7 +1777,7 @@ class ConfigObj(Section):
             # Only if multiline is set, so that it is used for values not
             # keys, and not values that are part of a list
             return ''
-        
+
         if multiline and isinstance(value, (list, tuple)):
             if not value:
                 return ','
@@ -1795,12 +1795,12 @@ class ConfigObj(Section):
 
         if not value:
             return '""'
-        
+
         no_lists_no_quotes = not self.list_values and '\n' not in value and '#' not in value
         need_triple = multiline and ((("'" in value) and ('"' in value)) or ('\n' in value ))
         hash_triple_quote = multiline and not need_triple and ("'" in value) and ('"' in value) and ('#' in value)
         check_for_single = (no_lists_no_quotes or not need_triple) and not hash_triple_quote
-        
+
         if check_for_single:
             if not self.list_values:
                 # we don't quote if ``list_values=False``
@@ -1818,13 +1818,13 @@ class ConfigObj(Section):
         else:
             # if value has '\n' or "'" *and* '"', it will need triple quotes
             quot = self._get_triple_quote(value)
-        
+
         if quot == noquot and '#' in value and self.list_values:
             quot = self._get_single_quote(value)
-                
+
         return quot % value
-    
-    
+
+
     def _get_single_quote(self, value):
         if ("'" in value) and ('"' in value):
             raise ConfigObjError('Value "%s" cannot be safely quoted.' % value)
@@ -1833,15 +1833,15 @@ class ConfigObj(Section):
         else:
             quot = dquot
         return quot
-    
-    
+
+
     def _get_triple_quote(self, value):
         if (value.find('"""') != -1) and (value.find("'''") != -1):
             raise ConfigObjError('Value "%s" cannot be safely quoted.' % value)
         if value.find('"""') == -1:
             quot = tdquot
         else:
-            quot = tsquot 
+            quot = tsquot
         return quot
 
 
@@ -1931,7 +1931,7 @@ class ConfigObj(Section):
 
     def _handle_configspec(self, configspec):
         """Parse the configspec."""
-        # FIXME: Should we check that the configspec was created with the 
+        # FIXME: Should we check that the configspec was created with the
         #        correct settings ? (i.e. ``list_values=False``)
         if not isinstance(configspec, ConfigObj):
             try:
@@ -1945,11 +1945,11 @@ class ConfigObj(Section):
                 raise ConfigspecError('Parsing configspec failed: %s' % e)
             except IOError as e:
                 raise IOError('Reading configspec failed: %s' % e)
-        
-        self.configspec = configspec
-            
 
-        
+        self.configspec = configspec
+
+
+
     def _set_configspec(self, section, copy):
         """
         Called by validate. Handles setting the configspec on subsections
@@ -1961,7 +1961,7 @@ class ConfigObj(Section):
             for entry in section.sections:
                 if entry not in configspec:
                     section[entry].configspec = many
-                    
+
         for entry in configspec.sections:
             if entry == '__many__':
                 continue
@@ -1972,11 +1972,11 @@ class ConfigObj(Section):
                     # copy comments
                     section.comments[entry] = configspec.comments.get(entry, [])
                     section.inline_comments[entry] = configspec.inline_comments.get(entry, '')
-                
+
             # Could be a scalar when we expect a section
             if isinstance(section[entry], Section):
                 section[entry].configspec = configspec[entry]
-                        
+
 
     def _write_line(self, indent_string, entry, this_entry, comment):
         """Write an individual line, for the write method"""
@@ -2016,9 +2016,9 @@ class ConfigObj(Section):
     def write(self, outfile=None, section=None):
         """
         Write the current ConfigObj as a file
-        
+
         tekNico: FIXME: use StringIO instead of real files
-        
+
         >>> filename = a.filename
         >>> a.filename = 'test.ini'
         >>> a.write()
@@ -2031,7 +2031,7 @@ class ConfigObj(Section):
         if self.indent_type is None:
             # this can be true if initialised from a dictionary
             self.indent_type = DEFAULT_INDENT_TYPE
-            
+
         out = []
         cs = self._a_to_u('#')
         csp = self._a_to_u('# ')
@@ -2045,7 +2045,7 @@ class ConfigObj(Section):
                 if stripped_line and not stripped_line.startswith(cs):
                     line = csp + line
                 out.append(line)
-                
+
         indent_string = self.indent_type * section.depth
         for entry in (section.scalars + section.sections):
             if entry in section.defaults:
@@ -2058,7 +2058,7 @@ class ConfigObj(Section):
                 out.append(indent_string + comment_line)
             this_entry = section[entry]
             comment = self._handle_comment(section.inline_comments[entry])
-            
+
             if isinstance(this_entry, Section):
                 # a section
                 out.append(self._write_marker(
@@ -2073,7 +2073,7 @@ class ConfigObj(Section):
                     entry,
                     this_entry,
                     comment))
-                
+
         if section is self:
             for line in self.final_comment:
                 line = self._decode_element(line)
@@ -2082,10 +2082,10 @@ class ConfigObj(Section):
                     line = csp + line
                 out.append(line)
             self.interpolation = int_val
-            
+
         if section is not self:
             return out
-        
+
         if (self.filename is None) and (outfile is None):
             # output a list of lines
             # might need to encode
@@ -2099,7 +2099,7 @@ class ConfigObj(Section):
                     out.append('')
                 out[0] = BOM_UTF8 + out[0]
             return out
-        
+
         # Turn the list to a string, joined with correct newlines
         newline = self.newlines or os.linesep
         if (getattr(outfile, 'mode', None) is not None and outfile.mode == 'w'
@@ -2131,34 +2131,34 @@ class ConfigObj(Section):
                  section=None):
         """
         Test the ConfigObj against a configspec.
-        
+
         It uses the ``validator`` object from *validate.py*.
-        
+
         To run ``validate`` on the current ConfigObj, call: ::
-        
+
             test = config.validate(validator)
-        
+
         (Normally having previously passed in the configspec when the ConfigObj
         was created - you can dynamically assign a dictionary of checks to the
         ``configspec`` attribute of a section though).
-        
+
         It returns ``True`` if everything passes, or a dictionary of
         pass/fails (True/False). If every member of a subsection passes, it
         will just have the value ``True``. (It also returns ``False`` if all
         members fail).
-        
+
         In addition, it converts the values from strings to their native
         types if their checks pass (and ``stringify`` is set).
-        
+
         If ``preserve_errors`` is ``True`` (``False`` is default) then instead
         of a marking a fail with a ``False``, it will preserve the actual
         exception object. This can contain info about the reason for failure.
         For example the ``VdtValueTooSmallError`` indicates that the value
         supplied was too small. If a value (or section) is missing it will
         still be marked as ``False``.
-        
+
         You must have the validate module to use ``preserve_errors=True``.
-        
+
         You can then use the ``flatten_errors`` function to turn your nested
         results dictionary into a flattened list of failures - useful for
         displaying meaningful error messages.
@@ -2171,7 +2171,7 @@ class ConfigObj(Section):
                 # Which makes importing configobj faster
                 from validate import VdtMissingValue
                 self._vdtMissingValue = VdtMissingValue
-                
+
             section = self
 
             if copy:
@@ -2181,23 +2181,23 @@ class ConfigObj(Section):
                 section.BOM = section.configspec.BOM
                 section.newlines = section.configspec.newlines
                 section.indent_type = section.configspec.indent_type
-            
+
         #
         # section.default_values.clear() #??
         configspec = section.configspec
         self._set_configspec(section, copy)
 
-        
+
         def validate_entry(entry, spec, val, missing, ret_true, ret_false):
             section.default_values.pop(entry, None)
-                
+
             try:
                 section.default_values[entry] = validator.get_default_value(configspec[entry])
             except (KeyError, AttributeError, validator.baseErrorClass):
                 # No default, bad default or validator has no 'get_default_value'
                 # (e.g. SimpleVal)
                 pass
-            
+
             try:
                 check = validator.check(spec,
                                         val,
@@ -2231,16 +2231,16 @@ class ConfigObj(Section):
                 if not copy and missing and entry not in section.defaults:
                     section.defaults.append(entry)
             return ret_true, ret_false
-        
+
         #
         out = {}
         ret_true = True
         ret_false = True
-        
+
         unvalidated = [k for k in section.scalars if k not in configspec]
-        incorrect_sections = [k for k in configspec.sections if k in section.scalars]        
+        incorrect_sections = [k for k in configspec.sections if k in section.scalars]
         incorrect_scalars = [k for k in configspec.scalars if k in section.sections]
-        
+
         for entry in configspec.scalars:
             if entry in ('__many__', '___many___'):
                 # reserved names
@@ -2260,16 +2260,16 @@ class ConfigObj(Section):
             else:
                 missing = False
                 val = section[entry]
-            
-            ret_true, ret_false = validate_entry(entry, configspec[entry], val, 
+
+            ret_true, ret_false = validate_entry(entry, configspec[entry], val,
                                                  missing, ret_true, ret_false)
-        
+
         many = None
         if '__many__' in configspec.scalars:
             many = configspec['__many__']
         elif '___many___' in configspec.scalars:
             many = configspec['___many___']
-        
+
         if many is not None:
             for entry in unvalidated:
                 val = section[entry]
@@ -2293,7 +2293,7 @@ class ConfigObj(Section):
                 ret_false = False
                 msg = 'Section %r was provided as a single value' % entry
                 out[entry] = validator.baseErrorClass(msg)
-                
+
         # Missing sections will have been created as empty ones when the
         # configspec was read.
         for entry in section.sections:
@@ -2314,7 +2314,7 @@ class ConfigObj(Section):
                 ret_false = False
             else:
                 ret_true = False
-        
+
         section.extra_values = unvalidated
         if preserve_errors and not section._created:
             # If the section wasn't created (i.e. it wasn't missing)
@@ -2343,12 +2343,12 @@ class ConfigObj(Section):
         self.configspec = None
         # Just to be sure ;-)
         self._original_configspec = None
-        
-        
+
+
     def reload(self):
         """
         Reload a ConfigObj from file.
-        
+
         This method raises a ``ReloadError`` if the ConfigObj doesn't have
         a filename attribute pointing to a file.
         """
@@ -2361,31 +2361,31 @@ class ConfigObj(Section):
             if entry == 'configspec':
                 continue
             current_options[entry] = getattr(self, entry)
-            
+
         configspec = self._original_configspec
         current_options['configspec'] = configspec
-            
+
         self.clear()
         self._initialise(current_options)
         self._load(filename, configspec)
-        
+
 
 
 class SimpleVal(object):
     """
     A simple validator.
     Can be used to check that all members expected are present.
-    
+
     To use it, provide a configspec with all your members in (the value given
     will be ignored). Pass an instance of ``SimpleVal`` to the ``validate``
     method of your ``ConfigObj``. ``validate`` will return ``True`` if all
     members are present, or a dictionary with True/False meaning
     present/missing. (Whole missing sections will be replaced with ``False``)
     """
-    
+
     def __init__(self):
         self.baseErrorClass = ConfigObjError
-    
+
     def check(self, check, member, missing=False):
         """A dummy check method, always returns the value unchanged."""
         if missing:
@@ -2397,32 +2397,32 @@ def flatten_errors(cfg, res, levels=None, results=None):
     """
     An example function that will turn a nested dictionary of results
     (as returned by ``ConfigObj.validate``) into a flat list.
-    
+
     ``cfg`` is the ConfigObj instance being checked, ``res`` is the results
     dictionary returned by ``validate``.
-    
+
     (This is a recursive function, so you shouldn't use the ``levels`` or
     ``results`` arguments - they are used by the function.)
-    
+
     Returns a list of keys that failed. Each member of the list is a tuple::
-    
+
         ([list of sections...], key, result)
-    
+
     If ``validate`` was called with ``preserve_errors=False`` (the default)
     then ``result`` will always be ``False``.
 
     *list of sections* is a flattened list of sections that the key was found
     in.
-    
+
     If the section was missing (or a section was expected and a scalar provided
     - or vice-versa) then key will be ``None``.
-    
+
     If the value (or section) was missing then ``result`` will be ``False``.
-    
+
     If ``validate`` was called with ``preserve_errors=True`` and a value
     was present, but failed the check, then ``result`` will be the exception
     object returned. You can use this as a string that describes the failure.
-    
+
     For example *The value "3" is of the wrong type*.
     """
     if levels is None:
@@ -2457,21 +2457,21 @@ def get_extra_values(conf, _prepend=()):
     """
     Find all the values and sections not in the configspec from a validated
     ConfigObj.
-    
+
     ``get_extra_values`` returns a list of tuples where each tuple represents
     either an extra section, or an extra value.
-    
-    The tuples contain two values, a tuple representing the section the value 
+
+    The tuples contain two values, a tuple representing the section the value
     is in and the name of the extra values. For extra values in the top level
     section the first member will be an empty tuple. For values in the 'foo'
     section the first member will be ``('foo',)``. For members in the 'bar'
     subsection of the 'foo' section the first member will be ``('foo', 'bar')``.
-    
+
     NOTE: If you call ``get_extra_values`` on a ConfigObj instance that hasn't
     been validated it will return an empty list.
     """
     out = []
-    
+
     out.extend([(_prepend, name) for name in conf.extra_values])
     for name in conf.sections:
         if name not in conf.extra_values:
diff --git a/autotune/python/genetic.py b/autotune/python/genetic.py
index d0ebd5750..ae6568fc9 100644
--- a/autotune/python/genetic.py
+++ b/autotune/python/genetic.py
@@ -13,15 +13,15 @@ from deap import tools as deap_tools
 
 from collections import OrderedDict as odict
 
-  
+
 def closest_divisor(N, x):
-  x_low=x_high=max(1,min(round(x),N))
-  while N % x_low > 0 and x_low>0:
-    x_low = x_low - 1
-  while N % x_high > 0 and x_high < N:
-    x_high = x_high + 1
-  return x_low if x - x_low < x_high - x else x_high
-  
+    x_low=x_high=max(1,min(round(x),N))
+    while N % x_low > 0 and x_low>0:
+        x_low = x_low - 1
+    while N % x_high > 0 and x_high < N:
+        x_high = x_high + 1
+    return x_low if x - x_low < x_high - x else x_high
+
 def b_gray_to_bin(A='00000000', endian='big'):
     assert type(endian) is str
     assert endian == 'little' or endian == 'big'
@@ -30,157 +30,155 @@ def b_gray_to_bin(A='00000000', endian='big'):
     for i in range(1, len(A)): b += str( int(b[i-1] != A[i]) )
     if endian == 'little': b = b[::-1] # Convert back to little endian if necessary
     return b
-      
+
 class GeneticOperators(object):
-      
-  def __init__(self, device, statement, parameter_names, TemplateType, build_template, out):
-      self.device = device
-      self.statement = statement
-      self.parameter_names = parameter_names
-      self.TemplateType = TemplateType
-      self.ParameterType = TemplateType.Parameters
-      self.build_template = build_template
-      self.cache = {}
-      self.indpb = 0.05
-      self.out = out
-      
-      creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
-      creator.create("Individual", list, fitness=creator.FitnessMin)
-    
-      self.toolbox = base.Toolbox()
-      self.toolbox.register("population", self.init)
-      self.toolbox.register("evaluate", self.evaluate)
-      self.toolbox.register("mate", deap_tools.cxTwoPoint)
-      self.toolbox.register("mutate", self.mutate)
-      self.toolbox.register("select", deap_tools.selNSGA2)
 
-  @staticmethod
-  def decode(s):
-    FetchingPolicy = vcl.atidlas.FetchingPolicy
-    fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
-    fetchA = fetch[s[0]]
-    fetchB = fetch[s[1]]
-    bincode = ''.join(s[2:])
-    decode_element = lambda x:2**int(b_gray_to_bin(x), 2)
-    simd = decode_element(bincode[0:3])
-    ls0 = decode_element(bincode[2:5])
-    ls1 = decode_element(bincode[5:8])
-    kL = decode_element(bincode[8:11])
-    mS = decode_element(bincode[11:14])
-    kS = decode_element(bincode[14:17])
-    nS = decode_element(bincode[17:20])
-    if fetchA==FetchingPolicy.FETCH_FROM_LOCAL or fetchB==FetchingPolicy.FETCH_FROM_LOCAL:
-      lf0 = decode_element(bincode[20:23])
-      lf1 = ls0*ls1/lf0
-    else:
-      lf0, lf1 = 0, 0
-    return [simd, ls0, kL, ls1, mS, kS, nS, fetchA, fetchB, lf0, lf1]
-    
-  def init(self, N):
-    result = []
-    fetchcount = [0, 0, 0]
-    while len(result) < N:
-      while True:
-        fetch = random.randint(0,2)
-        bincode = [fetch, fetch] + [str(random.randint(0,1)) for i in range(23)]
-        parameters = self.decode(bincode)
-        template = self.build_template(self.TemplateType.Parameters(*parameters))
-        registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4
-        lmem_usage = template.lmem_usage(vcl.atidlas.StatementsTuple(self.statement))
-        local_size = template.parameters.local_size_0*template.parameters.local_size_1
-        occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage)
-        if not tools.skip(template, self.statement, self.device):
-          fetchcount[fetch] = fetchcount[fetch] + 1
-          if max(fetchcount) - min(fetchcount) <= 1:
-            result.append(creator.Individual(bincode))
-            break
-          else:
-            fetchcount[fetch] = fetchcount[fetch] - 1
-    return result
+    def __init__(self, device, statement, parameter_names, TemplateType, build_template, out):
+        self.device = device
+        self.statement = statement
+        self.parameter_names = parameter_names
+        self.TemplateType = TemplateType
+        self.ParameterType = TemplateType.Parameters
+        self.build_template = build_template
+        self.cache = {}
+        self.indpb = 0.05
+        self.out = out
 
-  def mutate(self, individual):
-    while True:
-      new_individual = copy.deepcopy(individual)
-      for i in range(len(new_individual)):
-        if i < 2 and random.random() < self.indpb:
-          while new_individual[i] == individual[i]:
-            new_individual[i] = random.randint(0, 2)
-        elif i >= 2 and random.random() < self.indpb:
-          new_individual[i] = '1' if new_individual[i]=='0' else '0'
-      parameters = self.decode(new_individual)
-      template = self.build_template(self.TemplateType.Parameters(*parameters))
-      #print tools.skip(template, self.statement, self.device), parameters
-      if not tools.skip(template, self.statement, self.device):
-        break
-    return new_individual,
-      
-  def evaluate(self, individual):
-    if tuple(individual) not in self.cache:
-      parameters = self.decode(individual)      
-      template = self.build_template(self.TemplateType.Parameters(*parameters))
-      try:
-        tt = tools.benchmark(template, self.statement, self.device)
-        self.out.write(','.join([str(tt)]+map(str,map(int,parameters)))+'\n')
-        self.cache[tuple(individual)] = tt
-      except:
-        self.cache[tuple(individual)] = 10
-    return self.cache[tuple(individual)],
-        
-  def optimize(self, maxtime, maxgen, compute_perf, perf_metric):
-      hof = deap_tools.HallOfFame(1)
-      # Begin the generational process
-      gen = 0
-      maxtime = time.strptime(maxtime, '%Mm%Ss')
-      maxtime = maxtime.tm_min*60 + maxtime.tm_sec
-      start_time = time.time()
-      
-      mu = 30
-      cxpb = 0.2
-      mutpb = 0.7
-      
-      population = self.init(mu)
-      invalid_ind = [ind for ind in population if not ind.fitness.valid]
-      fitnesses = self.toolbox.map(self.evaluate, invalid_ind)
-      for ind, fit in zip(invalid_ind, fitnesses):
-        ind.fitness.values = fit
-      hof.update(population)
-        
-      while time.time() - start_time < maxtime:
-        # Vary the population        
-        offspring = []
-        for _ in xrange(mu):
-            op_choice = random.random()
-            if op_choice < cxpb:            # Apply crossover
-                ind1, ind2 = map(self.toolbox.clone, random.sample(population, 2))
-                ind1, ind2 = self.toolbox.mate(ind1, ind2)
-                del ind1.fitness.values
-                offspring.append(ind1)
-            elif op_choice < cxpb + mutpb:  # Apply mutation
-                ind = self.toolbox.clone(random.choice(population))
-                ind, = self.toolbox.mutate(ind)
-                del ind.fitness.values
-                offspring.append(ind)
-            else:                           # Apply reproduction
-                offspring.append(random.choice(population))
-        
-        #~ for x in offspring:
-          #~ print self.decode(x)
-        # Evaluate the individuals with an invalid fitness
-        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
+        creator.create("Individual", list, fitness=creator.FitnessMin)
+
+        self.toolbox = base.Toolbox()
+        self.toolbox.register("population", self.init)
+        self.toolbox.register("evaluate", self.evaluate)
+        self.toolbox.register("mate", deap_tools.cxTwoPoint)
+        self.toolbox.register("mutate", self.mutate)
+        self.toolbox.register("select", deap_tools.selNSGA2)
+
+    @staticmethod
+    def decode(s):
+        FetchingPolicy = vcl.atidlas.FetchingPolicy
+        fetch = [FetchingPolicy.FETCH_FROM_LOCAL, FetchingPolicy.FETCH_FROM_GLOBAL_CONTIGUOUS, FetchingPolicy.FETCH_FROM_GLOBAL_STRIDED]
+        fetchA = fetch[s[0]]
+        fetchB = fetch[s[1]]
+        bincode = ''.join(s[2:])
+        decode_element = lambda x:2**int(b_gray_to_bin(x), 2)
+        simd = decode_element(bincode[0:3])
+        ls0 = decode_element(bincode[2:5])
+        ls1 = decode_element(bincode[5:8])
+        kL = decode_element(bincode[8:11])
+        mS = decode_element(bincode[11:14])
+        kS = decode_element(bincode[14:17])
+        nS = decode_element(bincode[17:20])
+        if fetchA==FetchingPolicy.FETCH_FROM_LOCAL or fetchB==FetchingPolicy.FETCH_FROM_LOCAL:
+            lf0 = decode_element(bincode[20:23])
+            lf1 = ls0*ls1/lf0
+        else:
+            lf0, lf1 = 0, 0
+        return [simd, ls0, kL, ls1, mS, kS, nS, fetchA, fetchB, lf0, lf1]
+
+    def init(self, N):
+        result = []
+        fetchcount = [0, 0, 0]
+        while len(result) < N:
+            while True:
+                fetch = random.randint(0,2)
+                bincode = [fetch, fetch] + [str(random.randint(0,1)) for i in range(23)]
+                parameters = self.decode(bincode)
+                template = self.build_template(self.TemplateType.Parameters(*parameters))
+                registers_usage = template.registers_usage(vcl.atidlas.StatementsTuple(self.statement))/4
+                lmem_usage = template.lmem_usage(vcl.atidlas.StatementsTuple(self.statement))
+                local_size = template.parameters.local_size_0*template.parameters.local_size_1
+                occupancy_record = tools.OccupancyRecord(self.device, local_size, lmem_usage, registers_usage)
+                if not tools.skip(template, self.statement, self.device):
+                    fetchcount[fetch] = fetchcount[fetch] + 1
+                    if max(fetchcount) - min(fetchcount) <= 1:
+                        result.append(creator.Individual(bincode))
+                        break
+                    else:
+                        fetchcount[fetch] = fetchcount[fetch] - 1
+        return result
+
+    def mutate(self, individual):
+        while True:
+            new_individual = copy.deepcopy(individual)
+            for i in range(len(new_individual)):
+                if i < 2 and random.random() < self.indpb:
+                    while new_individual[i] == individual[i]:
+                        new_individual[i] = random.randint(0, 2)
+                elif i >= 2 and random.random() < self.indpb:
+                    new_individual[i] = '1' if new_individual[i]=='0' else '0'
+            parameters = self.decode(new_individual)
+            template = self.build_template(self.TemplateType.Parameters(*parameters))
+            #print tools.skip(template, self.statement, self.device), parameters
+            if not tools.skip(template, self.statement, self.device):
+                break
+        return new_individual,
+
+    def evaluate(self, individual):
+        if tuple(individual) not in self.cache:
+            parameters = self.decode(individual)
+            template = self.build_template(self.TemplateType.Parameters(*parameters))
+            try:
+                tt = tools.benchmark(template, self.statement, self.device)
+                self.out.write(','.join([str(tt)]+map(str,map(int,parameters)))+'\n')
+                self.cache[tuple(individual)] = tt
+            except:
+                self.cache[tuple(individual)] = 10
+        return self.cache[tuple(individual)],
+
+    def optimize(self, maxtime, maxgen, compute_perf, perf_metric):
+        hof = deap_tools.HallOfFame(1)
+        # Begin the generational process
+        gen = 0
+        maxtime = time.strptime(maxtime, '%Mm%Ss')
+        maxtime = maxtime.tm_min*60 + maxtime.tm_sec
+        start_time = time.time()
+
+        mu = 30
+        cxpb = 0.2
+        mutpb = 0.7
+
+        population = self.init(mu)
+        invalid_ind = [ind for ind in population if not ind.fitness.valid]
         fitnesses = self.toolbox.map(self.evaluate, invalid_ind)
         for ind, fit in zip(invalid_ind, fitnesses):
             ind.fitness.values = fit
-        # Update the hall of fame with the generated individuals
-        hof.update(offspring)
-        # Select the next generation population
-        population[:] = self.toolbox.select(population + offspring, mu)
-        #Update
-        gen = gen + 1
-        best_profile = '(%s)'%','.join(map(str,GeneticOperators.decode(hof[0])));
-        best_performance = compute_perf(hof[0].fitness.values[0])
-        sys.stdout.write('Time %d | Best %d %s [ for %s ]\r'%(time.time() - start_time, best_performance, perf_metric, best_profile))
-        sys.stdout.flush()
-      sys.stdout.write('\n')
-      return population
-    
-          
+        hof.update(population)
+
+        while time.time() - start_time < maxtime:
+            # Vary the population
+            offspring = []
+            for _ in xrange(mu):
+                op_choice = random.random()
+                if op_choice < cxpb:            # Apply crossover
+                    ind1, ind2 = map(self.toolbox.clone, random.sample(population, 2))
+                    ind1, ind2 = self.toolbox.mate(ind1, ind2)
+                    del ind1.fitness.values
+                    offspring.append(ind1)
+                elif op_choice < cxpb + mutpb:  # Apply mutation
+                    ind = self.toolbox.clone(random.choice(population))
+                    ind, = self.toolbox.mutate(ind)
+                    del ind.fitness.values
+                    offspring.append(ind)
+                else:                           # Apply reproduction
+                    offspring.append(random.choice(population))
+
+            #~ for x in offspring:
+                    #~ print self.decode(x)
+            # Evaluate the individuals with an invalid fitness
+            invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
+            fitnesses = self.toolbox.map(self.evaluate, invalid_ind)
+            for ind, fit in zip(invalid_ind, fitnesses):
+                ind.fitness.values = fit
+            # Update the hall of fame with the generated individuals
+            hof.update(offspring)
+            # Select the next generation population
+            population[:] = self.toolbox.select(population + offspring, mu)
+            #Update
+            gen = gen + 1
+            best_profile = '(%s)'%','.join(map(str,GeneticOperators.decode(hof[0])));
+            best_performance = compute_perf(hof[0].fitness.values[0])
+            sys.stdout.write('Time %d | Best %d %s [ for %s ]\r'%(time.time() - start_time, best_performance, perf_metric, best_profile))
+            sys.stdout.flush()
+        sys.stdout.write('\n')
+        return population
diff --git a/autotune/python/model.py b/autotune/python/model.py
index c1eb3dbca..782ab4790 100644
--- a/autotune/python/model.py
+++ b/autotune/python/model.py
@@ -4,41 +4,41 @@ import numpy as np
 import scipy as sp
 
 def train_model(X, Y, profiles):
-  #Preprocessing
-  scaler = preprocessing.StandardScaler().fit(X);
-  X = scaler.transform(X);
-  ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile
-  
-  print Y
-  print np.bincount(np.argmax(Y, axis=1))
-  #Cross-validation data-sets
-  cut = int(0.5*X.shape[0]+1);
-  XTr = X[0:cut, :];
-  YTr = Y[0:cut, :];
-  XTe = X[cut:,:];
-  YTe = Y[cut:,:];
+    #Preprocessing
+    scaler = preprocessing.StandardScaler().fit(X);
+    X = scaler.transform(X);
+    ref = np.argmax(np.bincount(np.argmax(Y, axis=1))) #most common profile
 
-  #Train the model
-  print("Training the model...");
-  clf = linear_model.LinearRegression().fit(XTr,YTr);
-  
-  #Evaluate the model
-  GFlops = np.empty(XTe.shape[0]);
-  speedups = np.empty(XTe.shape[0]);
-  optspeedups = np.empty(XTe.shape[0]);
-  for i,x in enumerate(XTe):
-    predictions = clf.predict(x);
-    label = np.argmax(predictions);
-    speedups[i] = YTe[i,label]/YTe[i,ref];
-    optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref];
-    GFlops[i] = YTe[i,ref];
-    
-  np.set_printoptions(precision=2);
-  print("-----------------");
-  print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups)));
-  print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups))));
-  print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)]));
-  print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)]));
-  print("--------");
-  
-  print clf
+    print Y
+    print np.bincount(np.argmax(Y, axis=1))
+    #Cross-validation data-sets
+    cut = int(0.5*X.shape[0]+1);
+    XTr = X[0:cut, :];
+    YTr = Y[0:cut, :];
+    XTe = X[cut:,:];
+    YTe = Y[cut:,:];
+
+    #Train the model
+    print("Training the model...");
+    clf = linear_model.LinearRegression().fit(XTr,YTr);
+
+    #Evaluate the model
+    GFlops = np.empty(XTe.shape[0]);
+    speedups = np.empty(XTe.shape[0]);
+    optspeedups = np.empty(XTe.shape[0]);
+    for i,x in enumerate(XTe):
+        predictions = clf.predict(x);
+        label = np.argmax(predictions);
+        speedups[i] = YTe[i,label]/YTe[i,ref];
+        optspeedups[i] = np.max(YTe[i,:])/YTe[i,ref];
+        GFlops[i] = YTe[i,ref];
+
+    np.set_printoptions(precision=2);
+    print("-----------------");
+    print("Average testing speedup : %f (Optimal : %f)"%(sp.stats.gmean(speedups), sp.stats.gmean(optspeedups)));
+    print("Average GFLOP/s : %f (Default %f, Optimal %f)"%(np.mean(np.multiply(GFlops,speedups)), np.mean(GFlops), np.mean(np.multiply(GFlops,optspeedups))));
+    print("Minimum speedup is %f wrt %i GFlops"%(np.min(speedups), GFlops[np.argmin(speedups)]));
+    print("Maximum speedup is %f wrt %i GFlops"%(np.max(speedups), GFlops[np.argmax(speedups)]));
+    print("--------");
+
+    print clf
diff --git a/autotune/python/optimize.py b/autotune/python/optimize.py
index a2439b28f..710165a42 100644
--- a/autotune/python/optimize.py
+++ b/autotune/python/optimize.py
@@ -21,8 +21,8 @@ from genetic import GeneticOperators
   #~ if operation == 'matrix-axpy': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D, fetch]
   #~ if operation == 'row-wise-reduction': return [simd, pow2_2D, pow2_2D, pow2_1D, fetch]
   #~ if operation == 'matrix-product': return [simd, pow2_2D, pow2_2D, pow2_2D, pow2_2D_unrolled,  pow2_2D_unrolled,  pow2_2D_unrolled, fetch, fetch, [0] + pow2_2D, [0] + pow2_2D]
-  #~ 
-  
+  #~
+
 #~ def exhaustive(statement, context, TemplateType, build_template, parameter_names, all_parameters, compute_perf, perf_metric, out):
   #~ device = context.devices[0]
   #~ nvalid = 0
@@ -46,8 +46,8 @@ from genetic import GeneticOperators
       #~ pass
   #~ sys.stdout.write('\n')
   #~ sys.stdout.flush()
-    #~ 
-  
+    #~
+
 def genetic(statement, context, TemplateType, build_template, parameter_names, compute_perf, perf_metric, out):
-  GA = GeneticOperators(context.devices[0], statement, parameter_names, TemplateType, build_template, out)
-  GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)
+    GA = GeneticOperators(context.devices[0], statement, parameter_names, TemplateType, build_template, out)
+    GA.optimize(maxtime='2m30s', maxgen=1000, compute_perf=compute_perf, perf_metric=perf_metric)
diff --git a/autotune/python/tools.py b/autotune/python/tools.py
index 14a1d9b5b..3136b4ed8 100644
--- a/autotune/python/tools.py
+++ b/autotune/python/tools.py
@@ -5,134 +5,136 @@ from pyviennacl.atidlas import StatementsTuple
 
 class PhysicalLimits:
     def __init__(self, dev):
-      self.compute_capability = pyopencl.characterize.nv_compute_capability(dev)
-      if self.compute_capability[0]==1:
-        if self.compute_capability[1]<=1:
-          self.warps_per_mp = 24
-          self.threads_per_mp = 768
-          self.num_32b_reg_per_mp = 8192
-          self.reg_alloc_unit_size = 256
-        else:
-          self.warps_per_mp = 32
-          self.threads_per_mp = 1024
-          self.num_32b_reg_per_mp = 16384
-          self.reg_alloc_unit_size = 512
-        self.threads_per_warp = 32
-        self.thread_blocks_per_mp = 8
-        self.reg_alloc_granularity = 'block'
-        self.reg_per_thread = 124
-        self.shared_mem_per_mp = 16384
-        self.shared_mem_alloc_unit_size = 512
-        self.warp_alloc_granularity = 2
-        self.max_thread_block_size = 512
-        
-      elif self.compute_capability[0]==2:
-        self.threads_per_warp = 32
-        self.warps_per_mp = 48
-        self.threads_per_mp = 1536
-        self.thread_blocks_per_mp = 8
-        self.num_32b_reg_per_mp = 32768
-        self.reg_alloc_unit_size = 64
-        self.reg_alloc_granularity = 'warp'
-        self.reg_per_thread = 63
-        self.shared_mem_per_mp = 49152
-        self.shared_mem_alloc_unit_size = 128
-        self.warp_alloc_granularity = 2
-        self.max_thread_block_size = 1024
-      
-      elif self.compute_capability[0]==3:
-        self.threads_per_warp = 32
-        self.warps_per_mp = 64
-        self.threads_per_mp = 2048
-        self.thread_blocks_per_mp = 16
-        self.num_32b_reg_per_mp = 65536
-        self.reg_alloc_unit_size = 256
-        self.reg_alloc_granularity = 'warp'
-        if(self.compute_capability[1]==5):
-          self.reg_per_thread = 255
-        else:
-          self.reg_per_thread = 63
-        self.shared_mem_per_mp = 49152
-        self.shared_mem_alloc_unit_size = 256
-        self.warp_alloc_granularity = 4
-        self.max_thread_block_size = 1024
-        
-      else:
-        raise Exception('Compute capability not supported!')
-        
-def _int_floor(value, multiple_of=1):
-  """Round C{value} down to be a C{multiple_of} something."""
-  # Mimicks the Excel "floor" function (for code stolen from occupancy calculator)
+        self.compute_capability = pyopencl.characterize.nv_compute_capability(dev)
+        if self.compute_capability[0]==1:
+            if self.compute_capability[1]<=1:
+                self.warps_per_mp = 24
+                self.threads_per_mp = 768
+                self.num_32b_reg_per_mp = 8192
+                self.reg_alloc_unit_size = 256
+            else:
+                self.warps_per_mp = 32
+                self.threads_per_mp = 1024
+                self.num_32b_reg_per_mp = 16384
+                self.reg_alloc_unit_size = 512
+            self.threads_per_warp = 32
+            self.thread_blocks_per_mp = 8
+            self.reg_alloc_granularity = 'block'
+            self.reg_per_thread = 124
+            self.shared_mem_per_mp = 16384
+            self.shared_mem_alloc_unit_size = 512
+            self.warp_alloc_granularity = 2
+            self.max_thread_block_size = 512
 
-  from math import floor
-  return int(floor(value/multiple_of))*multiple_of
-  
-def _int_ceiling(value, multiple_of=1):
-  """Round C{value} up to be a C{multiple_of} something."""
-  # Mimicks the Excel "floor" function (for code stolen from occupancy calculator)
+        elif self.compute_capability[0]==2:
+            self.threads_per_warp = 32
+            self.warps_per_mp = 48
+            self.threads_per_mp = 1536
+            self.thread_blocks_per_mp = 8
+            self.num_32b_reg_per_mp = 32768
+            self.reg_alloc_unit_size = 64
+            self.reg_alloc_granularity = 'warp'
+            self.reg_per_thread = 63
+            self.shared_mem_per_mp = 49152
+            self.shared_mem_alloc_unit_size = 128
+            self.warp_alloc_granularity = 2
+            self.max_thread_block_size = 1024
+
+        elif self.compute_capability[0]==3:
+            self.threads_per_warp = 32
+            self.warps_per_mp = 64
+            self.threads_per_mp = 2048
+            self.thread_blocks_per_mp = 16
+            self.num_32b_reg_per_mp = 65536
+            self.reg_alloc_unit_size = 256
+            self.reg_alloc_granularity = 'warp'
+            if(self.compute_capability[1]==5):
+                self.reg_per_thread = 255
+            else:
+                self.reg_per_thread = 63
+            self.shared_mem_per_mp = 49152
+            self.shared_mem_alloc_unit_size = 256
+            self.warp_alloc_granularity = 4
+            self.max_thread_block_size = 1024
+
+        else:
+            raise Exception('Compute capability not supported!')
 
-  from math import ceil
-  return int(ceil(value/multiple_of))*multiple_of
-      
 class OccupancyRecord:
-      
+
+    def _int_floor(value, multiple_of=1):
+        """Round C{value} down to be a C{multiple_of} something."""
+        # Mimicks the Excel "floor" function (for code stolen from occupancy calculator)
+        from math import floor
+        return int(floor(value/multiple_of))*multiple_of
+
+    def _int_ceiling(value, multiple_of=1):
+        """Round C{value} up to be a C{multiple_of} something."""
+        # Mimicks the Excel "floor" function (for code stolen from occupancy calculator)
+        from math import ceil
+        return int(ceil(value/multiple_of))*multiple_of
+
+    def init_nvidia(self, dev, threads, shared_mem, registers):
+        physical_limits = PhysicalLimits(dev)
+        limits = [];
+        allocated_warps =  max(1,_int_ceiling(threads/physical_limits.threads_per_warp))
+        max_warps_per_mp = physical_limits.warps_per_mp;
+        limits.append((min(physical_limits.thread_blocks_per_mp, _int_floor(max_warps_per_mp/allocated_warps)), 'warps'))
+
+        if registers>0:
+            if registers > physical_limits.reg_per_thread:
+                limits.append((0, 'registers'))
+            else:
+                allocated_regs = {'warp': allocated_warps,
+                                  'block': _int_ceiling(_int_ceiling(allocated_warps, physical_limits.warp_alloc_granularity)*registers*physical_limits.threads_per_warp,allocated_warps)}[physical_limits.reg_alloc_granularity]
+                max_reg_per_mp = {'warp': _int_floor(physical_limits.num_32b_reg_per_mp/_int_ceiling(registers*physical_limits.threads_per_warp, physical_limits.reg_alloc_unit_size), physical_limits.warp_alloc_granularity),
+                                  'block':physical_limits.num_32b_reg_per_mp}[physical_limits.reg_alloc_granularity]
+                limits.append((_int_floor(max_reg_per_mp/allocated_regs), 'registers'))
+
+        if shared_mem>0:
+            allocated_shared_mem = _int_ceiling(shared_mem, physical_limits.shared_mem_alloc_unit_size)
+            max_shared_mem_per_mp = physical_limits.shared_mem_per_mp
+            limits.append((_int_floor(max_shared_mem_per_mp/allocated_shared_mem), 'shared memory'))
+
+        self.limit, self.limited_by = min(limits)
+        self.warps_per_mp = self.limit*allocated_warps
+        self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp
+
     def __init__(self, dev, threads, shared_mem=0, registers=0):
-      physical_limits = PhysicalLimits(dev)
-      limits = [];
-      allocated_warps =  max(1,_int_ceiling(threads/physical_limits.threads_per_warp))
-      max_warps_per_mp = physical_limits.warps_per_mp;
-      limits.append((min(physical_limits.thread_blocks_per_mp, _int_floor(max_warps_per_mp/allocated_warps)), 'warps'))
-      
-      if registers>0:
-        if registers > physical_limits.reg_per_thread:
-          limits.append((0, 'registers'))
-        else:
-          allocated_regs = {'warp': allocated_warps,
-                            'block': _int_ceiling(_int_ceiling(allocated_warps, physical_limits.warp_alloc_granularity)*registers*physical_limits.threads_per_warp,allocated_warps)}[physical_limits.reg_alloc_granularity]
-          max_reg_per_mp = {'warp': _int_floor(physical_limits.num_32b_reg_per_mp/_int_ceiling(registers*physical_limits.threads_per_warp, physical_limits.reg_alloc_unit_size), physical_limits.warp_alloc_granularity),
-                            'block':physical_limits.num_32b_reg_per_mp}[physical_limits.reg_alloc_granularity]
-          limits.append((_int_floor(max_reg_per_mp/allocated_regs), 'registers'))
-      
-      if shared_mem>0:
-        allocated_shared_mem = _int_ceiling(shared_mem, physical_limits.shared_mem_alloc_unit_size)
-        max_shared_mem_per_mp = physical_limits.shared_mem_per_mp
-        limits.append((_int_floor(max_shared_mem_per_mp/allocated_shared_mem), 'shared memory'))
-      
-      self.limit, self.limited_by = min(limits)
-      self.warps_per_mp = self.limit*allocated_warps
-      self.occupancy = 100*self.warps_per_mp/physical_limits.warps_per_mp
-        
+        self.init_nvidia(self, dev, threads, shared_mem, registers)
+
+
 
 def skip(template, statement, device):
-      statements = StatementsTuple(statement)
-      registers_usage = template.registers_usage(statements)/4
-      lmem_usage = template.lmem_usage(statements)
-      local_size = template.parameters.local_size_0*template.parameters.local_size_1
-      occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
-      if template.check(statement) or occupancy_record.occupancy < 15:
+    statements = StatementsTuple(statement)
+    registers_usage = template.registers_usage(statements)/4
+    lmem_usage = template.lmem_usage(statements)
+    local_size = template.parameters.local_size_0*template.parameters.local_size_1
+    occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
+    if template.check(statement) or occupancy_record.occupancy < 15:
         return True
-      return False
-      
+    return False
+
 def benchmark(template, statement, device):
-      statements = StatementsTuple(statement)
-      registers_usage = template.registers_usage(statements)/4
-      lmem_usage = template.lmem_usage(statements)
-      local_size = template.parameters.local_size_0*template.parameters.local_size_1
-      occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
-      if occupancy_record.occupancy < 15 :
+    statements = StatementsTuple(statement)
+    registers_usage = template.registers_usage(statements)/4
+    lmem_usage = template.lmem_usage(statements)
+    local_size = template.parameters.local_size_0*template.parameters.local_size_1
+    occupancy_record = OccupancyRecord(device, local_size, lmem_usage, registers_usage)
+    if occupancy_record.occupancy < 15 :
         raise ValueError("Template has too low occupancy")
-      else:
+    else:
         #~ try:
         template.execute(statement, True)
         statement.result.context.finish_all_queues()
         N = 0
         current_time = 0
         while current_time < 1e-2:
-          time_before = time.time()
-          template.execute(statement,False)
-          statement.result.context.finish_all_queues()
-          current_time += time.time() - time_before
-          N+=1
+            time_before = time.time()
+            template.execute(statement,False)
+            statement.result.context.finish_all_queues()
+            current_time += time.time() - time_before
+            N+=1
         return current_time/N
         #~ except:
-          #~ raise ValueError("Invalid template")
+            #~ raise ValueError("Invalid template")
diff --git a/autotune/python/utils.py b/autotune/python/utils.py
index ae573a974..f8871eda9 100644
--- a/autotune/python/utils.py
+++ b/autotune/python/utils.py
@@ -5,18 +5,18 @@ all_devices = [d for platform in cl.get_platforms() for d in platform.get_device
 
 DEVICE_TYPE_PREFIX = {  cl.device_type.GPU: 'gpu',
                         cl.device_type.CPU: 'cpu',
-                        cl.device_type.ACCELERATOR: 'accelerator' 
+                        cl.device_type.ACCELERATOR: 'accelerator'
 }
-     
+
 DEVICE_TYPE_CL_NAME = { cl.device_type.GPU: 'CL_DEVICE_TYPE_GPU',
                         cl.device_type.CPU: 'CL_DEVICE_TYPE_CPU',
-                        cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR' 
+                        cl.device_type.ACCELERATOR: 'CL_DEVICE_TYPE_ACCELERATOR'
 }
-                        
+
 VENDOR_PREFIX = {       vcl.opencl.VendorId.beignet_id: 'beignet',
                         vcl.opencl.VendorId.nvidia_id: 'nvidia',
                         vcl.opencl.VendorId.amd_id: 'amd',
-                        vcl.opencl.VendorId.intel_id: 'intel' 
+                        vcl.opencl.VendorId.intel_id: 'intel'
 }
 
 DEVICES_PRESETS = {'all': all_devices,
@@ -26,8 +26,8 @@ DEVICES_PRESETS = {'all': all_devices,
 }
 
 
-                          
+
 def sanitize_string(string, keep_chars = ['_']):
-  string = string.replace(' ', '_').lower()
-  string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
-  return string
+    string = string.replace(' ', '_').lower()
+    string = "".join(c for c in string if c.isalnum() or c in keep_chars).rstrip()
+    return string
diff --git a/autotune/python/vclio.py b/autotune/python/vclio.py
index d1cddca9b..088582e57 100644
--- a/autotune/python/vclio.py
+++ b/autotune/python/vclio.py
@@ -3,114 +3,114 @@ import os
 import utils
 
 def append_include(data, path):
-  include_name = '#include "' + path +'"\n'
-  already_included = data.find(include_name)
-  if already_included == -1:
-    insert_index = data.index('\n', data.index('#define')) + 1
-    return data[:insert_index] + '\n' + include_name + data[insert_index:]
-  return data
-      
+    include_name = '#include "' + path +'"\n'
+    already_included = data.find(include_name)
+    if already_included == -1:
+        insert_index = data.index('\n', data.index('#define')) + 1
+        return data[:insert_index] + '\n' + include_name + data[insert_index:]
+    return data
+
 def generate_viennacl_headers(viennacl_root, device, datatype, operation, additional_parameters, parameters):
-  builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database")
-  if not os.path.isdir(builtin_database_dir):
-    raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n'
-                            'Your version of ViennaCL may be too old and/or corrupted.')
-                       
-  function_name_dict = { vcl.float32: 'add_4B',
-                         vcl.float64: 'add_8B' }
-  
-  additional_parameters_dict = {'N':  "char_to_type<'N'>",
-                                'T':  "char_to_type<'T'>"}
-  
-  #Create the device-specific headers
-  cpp_device_name = utils.sanitize_string(device.name)
-  function_name = function_name_dict[datatype]
-  operation = operation.replace('-','_')
-    
-  cpp_class_name = operation + '_template'
-  header_name = cpp_device_name + ".hpp"
-  function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \
-                                                                        [additional_parameters_dict[x] for x in additional_parameters]) + ')'
-  
-  device_type_prefix = utils.DEVICE_TYPE_PREFIX[device.type]
-  vendor_prefix = utils.VENDOR_PREFIX[device.vendor_id]
-  architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name)
-  
-  header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family]
-  header_directory = os.path.join(builtin_database_dir, *header_hierarchy)
-  header_path = os.path.join(header_directory, header_name)
-  
-  if not os.path.exists(header_directory):
-    os.makedirs(header_directory)
-  
-  if os.path.exists(header_path):
-    with open (header_path, "r") as myfile:
-      data=myfile.read()
-  else:
-    data = ''
+    builtin_database_dir = os.path.join(viennacl_root, "device_specific", "builtin_database")
+    if not os.path.isdir(builtin_database_dir):
+        raise EnvironmentError('ViennaCL root path is incorrect. Cannot access ' + builtin_database_dir + '!\n'
+                                'Your version of ViennaCL may be too old and/or corrupted.')
 
-  if not data:
-    ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper()
-    data =  ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
-        '#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
-        '\n'
-        '#include "viennacl/device_specific/forwards.h"\n'
-        '#include "viennacl/device_specific/builtin_database/common.hpp"\n'
-        '\n' 
-        'namespace viennacl{\n'
-        'namespace device_specific{\n'
-        'namespace builtin_database{\n'
-        'namespace devices{\n'
-        'namespace '  + device_type_prefix + '{\n'
-        'namespace '  + vendor_prefix + '{\n'
-        'namespace '  + architecture_family + '{\n'
-        'namespace '  + cpp_device_name + '{\n'
-        '\n'
-        '}\n'
-        '}\n'
-        '}\n'
-        '}\n'
-        '}\n'
-        '}\n'
-        '}\n'
-        '}\n'
-        '#endif\n'
-        '')
-  
-  data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp')
-    
-  add_to_database_arguments = [vendor_prefix + '_id', utils.DEVICE_TYPE_CL_NAME[device.type], 'ocl::'+architecture_family,
-                '"' + device.name + '"',  cpp_class_name + '::parameters' + str(parameters)]    
-  core = '  db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');'
-  
-  already_declared = data.find(function_declaration)
-  if already_declared==-1:
-    substr = 'namespace '  + cpp_device_name + '{\n'
-    insert_index = data.index(substr) + len(substr)
-    data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:]
-  else:
-    i1 = data.find('{', already_declared)
-    if data[i1-1]=='\n':
-      i1 = i1 - 1
-    i2 = data.find('}', already_declared) + 1 
-    data = data[:i1]  + '\n{\n' + core + '\n}' + data[i2:]
+    function_name_dict = { vcl.float32: 'add_4B',
+                           vcl.float64: 'add_8B' }
 
-  #Write the header file
-  with open(header_path, "w+") as myfile:
-    myfile.write(data)
-    
-  #Updates the global ViennaCL headers
-  with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header:      
-    data = operation_header.read()
-    data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir)))
-    
-    scope_name = '_'.join(('init', operation) + additional_parameters)
-    scope = data.index(scope_name)
-    function_call = '  ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')'
-    if function_call not in data:
-      insert_index = data.rindex('\n', 0, data.index('return result', scope))
-      data = data[:insert_index] + function_call + ';\n' + data[insert_index:]
+    additional_parameters_dict = {'N':  "char_to_type<'N'>",
+                                  'T':  "char_to_type<'T'>"}
 
-    operation_header.seek(0)
-    operation_header.truncate()
-    operation_header.write(data)
+    #Create the device-specific headers
+    cpp_device_name = utils.sanitize_string(device.name)
+    function_name = function_name_dict[datatype]
+    operation = operation.replace('-','_')
+
+    cpp_class_name = operation + '_template'
+    header_name = cpp_device_name + ".hpp"
+    function_declaration = 'inline void ' + function_name + '(' + ', '.join(['database_type<' + cpp_class_name + '::parameters_type> & db'] + \
+                                                                          [additional_parameters_dict[x] for x in additional_parameters]) + ')'
+
+    device_type_prefix = utils.DEVICE_TYPE_PREFIX[device.type]
+    vendor_prefix = utils.VENDOR_PREFIX[device.vendor_id]
+    architecture_family = vcl.opencl.architecture_family(device.vendor_id, device.name)
+
+    header_hierarchy = ["devices", device_type_prefix, vendor_prefix, architecture_family]
+    header_directory = os.path.join(builtin_database_dir, *header_hierarchy)
+    header_path = os.path.join(header_directory, header_name)
+
+    if not os.path.exists(header_directory):
+        os.makedirs(header_directory)
+
+    if os.path.exists(header_path):
+        with open (header_path, "r") as myfile:
+            data=myfile.read()
+    else:
+        data = ''
+
+    if not data:
+        ifndef_suffix = ('_'.join(header_hierarchy) + '_hpp_').upper()
+        data =  ('#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
+            '#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_' + ifndef_suffix + '\n'
+            '\n'
+            '#include "viennacl/device_specific/forwards.h"\n'
+            '#include "viennacl/device_specific/builtin_database/common.hpp"\n'
+            '\n'
+            'namespace viennacl{\n'
+            'namespace device_specific{\n'
+            'namespace builtin_database{\n'
+            'namespace devices{\n'
+            'namespace '  + device_type_prefix + '{\n'
+            'namespace '  + vendor_prefix + '{\n'
+            'namespace '  + architecture_family + '{\n'
+            'namespace '  + cpp_device_name + '{\n'
+            '\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '}\n'
+            '#endif\n'
+            '')
+
+    data = append_include(data, 'viennacl/device_specific/templates/' + cpp_class_name + '.hpp')
+
+    add_to_database_arguments = [vendor_prefix + '_id', utils.DEVICE_TYPE_CL_NAME[device.type], 'ocl::'+architecture_family,
+                  '"' + device.name + '"',  cpp_class_name + '::parameters' + str(parameters)]
+    core = '  db.' + function_name + '(' + ', '.join(add_to_database_arguments) + ');'
+
+    already_declared = data.find(function_declaration)
+    if already_declared==-1:
+        substr = 'namespace '  + cpp_device_name + '{\n'
+        insert_index = data.index(substr) + len(substr)
+        data = data[:insert_index] + '\n' + function_declaration + '\n{\n' + core + '\n}\n' + data[insert_index:]
+    else:
+        i1 = data.find('{', already_declared)
+        if data[i1-1]=='\n':
+            i1 = i1 - 1
+        i2 = data.find('}', already_declared) + 1
+        data = data[:i1]  + '\n{\n' + core + '\n}' + data[i2:]
+
+    #Write the header file
+    with open(header_path, "w+") as myfile:
+        myfile.write(data)
+
+    #Updates the global ViennaCL headers
+    with open(os.path.join(builtin_database_dir, operation + '.hpp'), 'r+') as operation_header:
+        data = operation_header.read()
+        data = append_include(data, os.path.relpath(header_path, os.path.join(viennacl_root, os.pardir)))
+
+        scope_name = '_'.join(('init', operation) + additional_parameters)
+        scope = data.index(scope_name)
+        function_call = '  ' + '::'.join(header_hierarchy + [cpp_device_name, function_name]) + '(' + ', '.join(['result'] + [additional_parameters_dict[k] + '()' for k in additional_parameters]) + ')'
+        if function_call not in data:
+            insert_index = data.rindex('\n', 0, data.index('return result', scope))
+            data = data[:insert_index] + function_call + ';\n' + data[insert_index:]
+
+        operation_header.seek(0)
+        operation_header.truncate()
+        operation_header.write(data)