Assert is a keyword, not a function

This commit is contained in:
Jie Tang
2016-06-16 00:20:22 -07:00
parent 5372d34b37
commit 36d476224e
11 changed files with 101 additions and 101 deletions

View File

@@ -41,7 +41,7 @@ class AlgorithmicEnv(Env):
def _get_obs(self, pos=None): def _get_obs(self, pos=None):
if pos is None: if pos is None:
pos = self.x pos = self.x
assert(isinstance(pos, np.ndarray) and pos.shape[0] == self.inp_dim) assert isinstance(pos, np.ndarray) and pos.shape[0] == self.inp_dim
if ha(pos) not in self.content: if ha(pos) not in self.content:
self.content[ha(pos)] = self.base self.content[ha(pos)] = self.base
return self.content[ha(pos)] return self.content[ha(pos)]
@@ -90,7 +90,7 @@ class AlgorithmicEnv(Env):
x_str = label + x_str x_str = label + x_str
return x_str return x_str
else: else:
assert(False) assert False
def _render(self, mode='human', close=False): def _render(self, mode='human', close=False):

View File

@@ -406,7 +406,7 @@ class BipedalWalker(gym.Env):
1.0 if self.legs[3].ground_contact else 0.0 1.0 if self.legs[3].ground_contact else 0.0
] ]
state += [l.fraction for l in self.lidar] state += [l.fraction for l in self.lidar]
assert(len(state)==24) assert len(state)==24
self.scroll = pos.x - VIEWPORT_W/SCALE/5 self.scroll = pos.x - VIEWPORT_W/SCALE/5

View File

@@ -263,7 +263,7 @@ class LunarLander(gym.Env):
1.0 if self.legs[0].ground_contact else 0.0, 1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0 1.0 if self.legs[1].ground_contact else 0.0
] ]
assert(len(state)==8) assert len(state)==8
reward = 0 reward = 0
shaping = \ shaping = \

View File

@@ -21,7 +21,7 @@ class OneRoundDeterministicRewardEnv(gym.Env):
self._reset() self._reset()
def _step(self, action): def _step(self, action):
assert(self.action_space.contains(action)) assert self.action_space.contains(action)
if action: if action:
reward = 1 reward = 1
else: else:

View File

@@ -22,7 +22,7 @@ class OneRoundNondeterministicRewardEnv(gym.Env):
self._reset() self._reset()
def _step(self, action): def _step(self, action):
assert(self.action_space.contains(action)) assert self.action_space.contains(action)
if action: if action:
#your agent should figure out that this option has expected value 2.5 #your agent should figure out that this option has expected value 2.5
reward = random.choice([0, 5]) reward = random.choice([0, 5])

View File

@@ -28,7 +28,7 @@ class TwoRoundDeterministicRewardEnv(gym.Env):
def _step(self, action): def _step(self, action):
rewards = [[0, 3], [1, 2]] rewards = [[0, 3], [1, 2]]
assert(self.action_space.contains(action)) assert self.action_space.contains(action)
if self.firstAction is None: if self.firstAction is None:
self.firstAction = action self.firstAction = action

View File

@@ -38,7 +38,7 @@ class TwoRoundNondeterministicRewardEnv(gym.Env):
] ]
] ]
assert(self.action_space.contains(action)) assert self.action_space.contains(action)
if self.firstAction is None: if self.firstAction is None:
self.firstAction = action self.firstAction = action

View File

@@ -19,29 +19,29 @@ import math
class ConvergenceControl(gym.Env): class ConvergenceControl(gym.Env):
"""Environment where agent learns to tune parameters of training """Environment where agent learns to tune parameters of training
DURING the training of the neural network to improve its convergence / DURING the training of the neural network to improve its convergence /
performance on the validation set. performance on the validation set.
Parameters can be tuned after every epoch. Parameters tuned are learning Parameters can be tuned after every epoch. Parameters tuned are learning
rate, learning rate decay, momentum, batch size, L1 / L2 regularization. rate, learning rate decay, momentum, batch size, L1 / L2 regularization.
Agent is provided with feedback on validation accuracy, as well as on Agent is provided with feedback on validation accuracy, as well as on
the size of dataset and number of classes, and some coarse description of the size of dataset and number of classes, and some coarse description of
architecture being optimized. architecture being optimized.
The most close publication that I am aware of that tries to solve similar The most close publication that I am aware of that tries to solve similar
environment is environment is
http://research.microsoft.com/pubs/259048/daniel2016stepsizecontrol.pdf http://research.microsoft.com/pubs/259048/daniel2016stepsizecontrol.pdf
""" """
metadata = {"render.modes": ["human"]} metadata = {"render.modes": ["human"]}
def __init__(self, natural=False): def __init__(self, natural=False):
""" """
Initialize environment Initialize environment
""" """
# I use array of len 1 to store constants (otherwise there were some errors) # I use array of len 1 to store constants (otherwise there were some errors)
self.action_space = spaces.Tuple(( self.action_space = spaces.Tuple((
spaces.Box(-5.0,0.0, 1), # learning rate spaces.Box(-5.0,0.0, 1), # learning rate
@@ -51,13 +51,13 @@ class ConvergenceControl(gym.Env):
spaces.Box(-6.0,1.0, 1), # l1 reg spaces.Box(-6.0,1.0, 1), # l1 reg
spaces.Box(-6.0,1.0, 1), # l2 reg spaces.Box(-6.0,1.0, 1), # l2 reg
)) ))
# observation features, in order: num of instances, num of labels, # observation features, in order: num of instances, num of labels,
# number of filter in part A / B of neural net, num of neurons in # number of filter in part A / B of neural net, num of neurons in
# output layer, validation accuracy after training with given # output layer, validation accuracy after training with given
# parameters # parameters
self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy
# Start the first game # Start the first game
self._reset() self._reset()
@@ -65,94 +65,94 @@ class ConvergenceControl(gym.Env):
""" """
Perform some action in the environment Perform some action in the environment
""" """
assert(self.action_space.contains(action)) assert self.action_space.contains(action)
lr, decay, momentum, batch_size, l1, l2 = action; lr, decay, momentum, batch_size, l1, l2 = action;
# map ranges of inputs # map ranges of inputs
lr = (10.0 ** lr[0]).astype('float32') lr = (10.0 ** lr[0]).astype('float32')
decay = (10.0 ** decay[0]).astype('float32') decay = (10.0 ** decay[0]).astype('float32')
momentum = (10.0 ** momentum[0]).astype('float32') momentum = (10.0 ** momentum[0]).astype('float32')
batch_size = int( 2 ** batch_size[0] ) batch_size = int( 2 ** batch_size[0] )
l1 = (10.0 ** l1[0]).astype('float32') l1 = (10.0 ** l1[0]).astype('float32')
l2 = (10.0 ** l2[0]).astype('float32') l2 = (10.0 ** l2[0]).astype('float32')
""" """
names = ["lr", "decay", "mom", "batch", "l1", "l2"] names = ["lr", "decay", "mom", "batch", "l1", "l2"]
values = [lr, decay, momentum, batch_size, l1, l2] values = [lr, decay, momentum, batch_size, l1, l2]
for n,v in zip(names, values): for n,v in zip(names, values):
print(n,v) print(n,v)
""" """
X,Y,Xv,Yv = self.data X,Y,Xv,Yv = self.data
# set parameters of training step # set parameters of training step
self.sgd.lr.set_value(lr) self.sgd.lr.set_value(lr)
self.sgd.decay.set_value(decay) self.sgd.decay.set_value(decay)
self.sgd.momentum.set_value(momentum) self.sgd.momentum.set_value(momentum)
self.reg.l1.set_value(l1) self.reg.l1.set_value(l1)
self.reg.l2.set_value(l2) self.reg.l2.set_value(l2)
# train model for one epoch_idx # train model for one epoch_idx
H = self.model.fit(X, Y, H = self.model.fit(X, Y,
batch_size=int(batch_size), batch_size=int(batch_size),
nb_epoch=1, nb_epoch=1,
shuffle=True) shuffle=True)
_, acc = self.model.evaluate(Xv,Yv) _, acc = self.model.evaluate(Xv,Yv)
# save best validation # save best validation
if acc > self.best_val: if acc > self.best_val:
self.best_val = acc self.best_val = acc
self.previous_acc = acc; self.previous_acc = acc;
self.epoch_idx = self.epoch_idx + 1 self.epoch_idx = self.epoch_idx + 1
diverged = math.isnan( H.history['loss'][-1] ) diverged = math.isnan( H.history['loss'][-1] )
done = self.epoch_idx == 20 or diverged done = self.epoch_idx == 20 or diverged
if diverged: if diverged:
""" maybe not set to a very large value; if you get something nice, """ maybe not set to a very large value; if you get something nice,
but then diverge, maybe it is not too bad but then diverge, maybe it is not too bad
""" """
reward = -100.0 reward = -100.0
else: else:
reward = self.best_val reward = self.best_val
# as number of labels increases, learning problem becomes # as number of labels increases, learning problem becomes
# more difficult for fixed dataset size. In order to avoid # more difficult for fixed dataset size. In order to avoid
# for the agent to ignore more complex datasets, on which # for the agent to ignore more complex datasets, on which
# accuracy is low and concentrate on simple cases which bring bulk # accuracy is low and concentrate on simple cases which bring bulk
# of reward, I normalize by number of labels in dataset # of reward, I normalize by number of labels in dataset
reward = reward * self.nb_classes reward = reward * self.nb_classes
# formula below encourages higher best validation # formula below encourages higher best validation
reward = reward + reward ** 2 reward = reward + reward ** 2
return self._get_obs(), reward, done, {} return self._get_obs(), reward, done, {}
def _render(self, mode="human", close=False): def _render(self, mode="human", close=False):
if close: if close:
return return
print(">> Step ",self.epoch_idx,"best validation:", self.best_val) print(">> Step ",self.epoch_idx,"best validation:", self.best_val)
def _get_obs(self): def _get_obs(self):
""" """
Observe the environment. Is usually used after the step is taken Observe the environment. Is usually used after the step is taken
""" """
# observation as per observation space # observation as per observation space
return np.array([self.nb_classes, return np.array([self.nb_classes,
self.nb_inst, self.nb_inst,
self.convAsz, self.convAsz,
self.convBsz, self.convBsz,
@@ -160,130 +160,130 @@ class ConvergenceControl(gym.Env):
self.previous_acc]) self.previous_acc])
def data_mix(self): def data_mix(self):
# randomly choose dataset # randomly choose dataset
dataset = random.choice(['mnist', 'cifar10', 'cifar100'])# dataset = random.choice(['mnist', 'cifar10', 'cifar100'])#
n_labels = 10 n_labels = 10
if dataset == "mnist": if dataset == "mnist":
data = mnist.load_data() data = mnist.load_data()
if dataset == "cifar10": if dataset == "cifar10":
data = cifar10.load_data() data = cifar10.load_data()
if dataset == "cifar100": if dataset == "cifar100":
data = cifar100.load_data() data = cifar100.load_data()
n_labels = 100 n_labels = 100
# Choose dataset size. This affects regularization needed # Choose dataset size. This affects regularization needed
r = np.random.rand() r = np.random.rand()
# not using full dataset to make regularization more important and # not using full dataset to make regularization more important and
# speed up testing a little bit # speed up testing a little bit
data_size = int( 2000 * (1-r) + 40000 * r ) data_size = int( 2000 * (1-r) + 40000 * r )
# I do not use test data for validation, but last 10000 instances in dataset # I do not use test data for validation, but last 10000 instances in dataset
# so that trained models can be compared to results in literature # so that trained models can be compared to results in literature
(CX, CY), (CXt, CYt) = data (CX, CY), (CXt, CYt) = data
if dataset == "mnist": if dataset == "mnist":
CX = np.expand_dims(CX, axis=1) CX = np.expand_dims(CX, axis=1)
data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:]; data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:];
return data, n_labels return data, n_labels
def _reset(self): def _reset(self):
reg = WeightRegularizer() reg = WeightRegularizer()
# a hack to make regularization variable # a hack to make regularization variable
reg.l1 = K.variable(0.0) reg.l1 = K.variable(0.0)
reg.l2 = K.variable(0.0) reg.l2 = K.variable(0.0)
data, nb_classes = self.data_mix() data, nb_classes = self.data_mix()
X, Y, Xv, Yv = data X, Y, Xv, Yv = data
# input square image dimensions # input square image dimensions
img_rows, img_cols = X.shape[-1], X.shape[-1] img_rows, img_cols = X.shape[-1], X.shape[-1]
img_channels = X.shape[1] img_channels = X.shape[1]
# save number of classes and instances # save number of classes and instances
self.nb_classes = nb_classes self.nb_classes = nb_classes
self.nb_inst = len(X) self.nb_inst = len(X)
# convert class vectors to binary class matrices # convert class vectors to binary class matrices
Y = np_utils.to_categorical(Y, nb_classes) Y = np_utils.to_categorical(Y, nb_classes)
Yv = np_utils.to_categorical(Yv, nb_classes) Yv = np_utils.to_categorical(Yv, nb_classes)
# here definition of the model happens # here definition of the model happens
model = Sequential() model = Sequential()
# double true for icnreased probability of conv layers # double true for icnreased probability of conv layers
if random.choice([True, True, False]): if random.choice([True, True, False]):
# Choose convolution #1 # Choose convolution #1
self.convAsz = random.choice([32,64,128]) self.convAsz = random.choice([32,64,128])
model.add(Convolution2D(self.convAsz, 3, 3, border_mode='same', model.add(Convolution2D(self.convAsz, 3, 3, border_mode='same',
input_shape=(img_channels, img_rows, img_cols), input_shape=(img_channels, img_rows, img_cols),
W_regularizer = reg, W_regularizer = reg,
b_regularizer = reg)) b_regularizer = reg))
model.add(Activation('relu')) model.add(Activation('relu'))
model.add(Convolution2D(self.convAsz, 3, 3, model.add(Convolution2D(self.convAsz, 3, 3,
W_regularizer = reg, W_regularizer = reg,
b_regularizer = reg)) b_regularizer = reg))
model.add(Activation('relu')) model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25)) model.add(Dropout(0.25))
# Choose convolution size B (if needed) # Choose convolution size B (if needed)
self.convBsz = random.choice([0,32,64]) self.convBsz = random.choice([0,32,64])
if self.convBsz > 0: if self.convBsz > 0:
model.add(Convolution2D(self.convBsz, 3, 3, border_mode='same', model.add(Convolution2D(self.convBsz, 3, 3, border_mode='same',
W_regularizer = reg, W_regularizer = reg,
b_regularizer = reg)) b_regularizer = reg))
model.add(Activation('relu')) model.add(Activation('relu'))
model.add(Convolution2D(self.convBsz, 3, 3, model.add(Convolution2D(self.convBsz, 3, 3,
W_regularizer = reg, W_regularizer = reg,
b_regularizer = reg)) b_regularizer = reg))
model.add(Activation('relu')) model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25)) model.add(Dropout(0.25))
model.add(Flatten()) model.add(Flatten())
else: else:
model.add(Flatten(input_shape=(img_channels, img_rows, img_cols))) model.add(Flatten(input_shape=(img_channels, img_rows, img_cols)))
self.convAsz = 0 self.convAsz = 0
self.convBsz = 0 self.convBsz = 0
# choose fully connected layer size # choose fully connected layer size
self.densesz = random.choice([256,512,762]) self.densesz = random.choice([256,512,762])
model.add(Dense(self.densesz, model.add(Dense(self.densesz,
W_regularizer = reg, W_regularizer = reg,
b_regularizer = reg)) b_regularizer = reg))
model.add(Activation('relu')) model.add(Activation('relu'))
model.add(Dropout(0.5)) model.add(Dropout(0.5))
model.add(Dense(nb_classes, model.add(Dense(nb_classes,
W_regularizer = reg, W_regularizer = reg,
b_regularizer = reg)) b_regularizer = reg))
model.add(Activation('softmax')) model.add(Activation('softmax'))
# let's train the model using SGD + momentum (how original). # let's train the model using SGD + momentum (how original).
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', model.compile(loss='categorical_crossentropy',
optimizer=sgd, optimizer=sgd,
metrics=['accuracy']) metrics=['accuracy'])
X = X.astype('float32') X = X.astype('float32')
Xv = Xv.astype('float32') Xv = Xv.astype('float32')
X /= 255 X /= 255
@@ -292,12 +292,12 @@ class ConvergenceControl(gym.Env):
self.data = (X,Y,Xv,Yv) self.data = (X,Y,Xv,Yv)
self.model = model self.model = model
self.sgd = sgd self.sgd = sgd
# initial accuracy values # initial accuracy values
self.best_val = 0.0 self.best_val = 0.0
self.previous_acc = 0.0 self.previous_acc = 0.0
self.reg = reg self.reg = reg
self.epoch_idx = 0 self.epoch_idx = 0
return self._get_obs() return self._get_obs()

View File

@@ -59,7 +59,7 @@ class CNNClassifierTraining(gym.Env):
""" """
Perform some action in the environment Perform some action in the environment
""" """
assert (self.action_space.contains(action)) assert self.action_space.contains(action)
lr, decay, momentum, batch_size, l1, l2, convs, fcs = action lr, decay, momentum, batch_size, l1, l2, convs, fcs = action
@@ -274,4 +274,4 @@ class CNNClassifierTraining(gym.Env):
if not diverged: if not diverged:
_, acc = model.evaluate(Xv, Yv) _, acc = model.evaluate(Xv, Yv)
return diverged, acc return diverged, acc

View File

@@ -89,7 +89,7 @@ class BlackjackEnv(gym.Env):
return [seed] return [seed]
def _step(self, action): def _step(self, action):
assert(self.action_space.contains(action)) assert self.action_space.contains(action)
if action: # hit: add a card to players hand and return if action: # hit: add a card to players hand and return
self.player.append(draw_card(self.np_random)) self.player.append(draw_card(self.np_random))
if is_bust(self.player): if is_bust(self.player):

View File

@@ -36,7 +36,7 @@ class NChainEnv(gym.Env):
return [seed] return [seed]
def _step(self, action): def _step(self, action):
assert(self.action_space.contains(action)) assert self.action_space.contains(action)
if self.np_random.rand() < self.slip: if self.np_random.rand() < self.slip:
action = not action # agent slipped, reverse action taken action = not action # agent slipped, reverse action taken
if action: # 'backwards': go back to the beginning, get small reward if action: # 'backwards': go back to the beginning, get small reward