57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
import os
|
|
import sys
|
|
import re
|
|
import random
|
|
import numpy as np
|
|
|
|
def sample_profiles(execution_handler, generator):
|
|
print "Sampling profiles..."
|
|
|
|
t = np.empty(0)
|
|
profiles = []
|
|
for i, x in enumerate(generator):
|
|
print x
|
|
if i==0:
|
|
X = np.empty((0,len(x)))
|
|
y = execution_handler(x)
|
|
if y not in profiles:
|
|
profiles.append(y)
|
|
idx = profiles.index(y)
|
|
|
|
X = np.vstack((X, x))
|
|
t = np.append(t, idx)
|
|
|
|
idx = int(t[np.argmax(np.linalg.norm(X, axis=1))])
|
|
profiles = [profiles[idx]] + [x for i,x in enumerate(profiles) if i!=idx]
|
|
return profiles
|
|
|
|
def sample_dataset(prefix_name, profiles, execution_handler, generator):
|
|
P = len(profiles)
|
|
print "Generating the dataset..."
|
|
Y = np.empty((0, P))
|
|
for i,x in enumerate(generator):
|
|
if i==0:
|
|
X = np.empty((0,len(x)))
|
|
new_y = np.zeros(P)
|
|
for j,y in enumerate(profiles):
|
|
T = execution_handler(x, os.devnull, y)
|
|
new_y[j] = T
|
|
X = np.vstack((X, x))
|
|
Y = np.vstack((Y, new_y))
|
|
if i%10==0:
|
|
sys.stdout.write('%d data points generated\r'%i)
|
|
sys.stdout.flush()
|
|
|
|
idx = np.argsort(Y[np.argmax(np.linalg.norm(X, axis=1)),:])
|
|
Y = Y[:, idx]
|
|
profiles = [profiles[i] for i in idx]
|
|
|
|
dir = os.path.join("data", prefix_name)
|
|
if not os.path.exists(dir):
|
|
os.makedirs(dir)
|
|
np.savetxt(os.path.join(dir,"X.csv"), X)
|
|
np.savetxt(os.path.join(dir,"Y.csv"), Y)
|
|
np.savetxt(os.path.join(dir,"profiles.csv"), profiles)
|
|
|
|
return X, Y, profiles
|