diff --git a/gym/envs/algorithmic/algorithmic_env.py b/gym/envs/algorithmic/algorithmic_env.py index 6183fa386..4bbe7ed9d 100644 --- a/gym/envs/algorithmic/algorithmic_env.py +++ b/gym/envs/algorithmic/algorithmic_env.py @@ -41,7 +41,7 @@ class AlgorithmicEnv(Env): def _get_obs(self, pos=None): if pos is None: pos = self.x - assert(isinstance(pos, np.ndarray) and pos.shape[0] == self.inp_dim) + assert isinstance(pos, np.ndarray) and pos.shape[0] == self.inp_dim if ha(pos) not in self.content: self.content[ha(pos)] = self.base return self.content[ha(pos)] @@ -90,7 +90,7 @@ class AlgorithmicEnv(Env): x_str = label + x_str return x_str else: - assert(False) + assert False def _render(self, mode='human', close=False): diff --git a/gym/envs/box2d/bipedal_walker.py b/gym/envs/box2d/bipedal_walker.py index 18428428c..5ef94d13b 100644 --- a/gym/envs/box2d/bipedal_walker.py +++ b/gym/envs/box2d/bipedal_walker.py @@ -406,7 +406,7 @@ class BipedalWalker(gym.Env): 1.0 if self.legs[3].ground_contact else 0.0 ] state += [l.fraction for l in self.lidar] - assert(len(state)==24) + assert len(state)==24 self.scroll = pos.x - VIEWPORT_W/SCALE/5 diff --git a/gym/envs/box2d/lunar_lander.py b/gym/envs/box2d/lunar_lander.py index fd0068976..9fe4e5aec 100644 --- a/gym/envs/box2d/lunar_lander.py +++ b/gym/envs/box2d/lunar_lander.py @@ -263,7 +263,7 @@ class LunarLander(gym.Env): 1.0 if self.legs[0].ground_contact else 0.0, 1.0 if self.legs[1].ground_contact else 0.0 ] - assert(len(state)==8) + assert len(state)==8 reward = 0 shaping = \ diff --git a/gym/envs/debugging/one_round_deterministic_reward.py b/gym/envs/debugging/one_round_deterministic_reward.py index 4393d9e63..6c1afdf50 100644 --- a/gym/envs/debugging/one_round_deterministic_reward.py +++ b/gym/envs/debugging/one_round_deterministic_reward.py @@ -21,7 +21,7 @@ class OneRoundDeterministicRewardEnv(gym.Env): self._reset() def _step(self, action): - assert(self.action_space.contains(action)) + assert self.action_space.contains(action) if action: reward = 1 else: diff --git a/gym/envs/debugging/one_round_nondeterministic_reward.py b/gym/envs/debugging/one_round_nondeterministic_reward.py index c4dc3f68b..0cccbaebf 100644 --- a/gym/envs/debugging/one_round_nondeterministic_reward.py +++ b/gym/envs/debugging/one_round_nondeterministic_reward.py @@ -22,7 +22,7 @@ class OneRoundNondeterministicRewardEnv(gym.Env): self._reset() def _step(self, action): - assert(self.action_space.contains(action)) + assert self.action_space.contains(action) if action: #your agent should figure out that this option has expected value 2.5 reward = random.choice([0, 5]) diff --git a/gym/envs/debugging/two_round_deterministic_reward.py b/gym/envs/debugging/two_round_deterministic_reward.py index 0e7fbd784..3b8e197ed 100644 --- a/gym/envs/debugging/two_round_deterministic_reward.py +++ b/gym/envs/debugging/two_round_deterministic_reward.py @@ -28,7 +28,7 @@ class TwoRoundDeterministicRewardEnv(gym.Env): def _step(self, action): rewards = [[0, 3], [1, 2]] - assert(self.action_space.contains(action)) + assert self.action_space.contains(action) if self.firstAction is None: self.firstAction = action diff --git a/gym/envs/debugging/two_round_nondeterministic_reward.py b/gym/envs/debugging/two_round_nondeterministic_reward.py index 505d044da..da7107713 100644 --- a/gym/envs/debugging/two_round_nondeterministic_reward.py +++ b/gym/envs/debugging/two_round_nondeterministic_reward.py @@ -38,7 +38,7 @@ class TwoRoundNondeterministicRewardEnv(gym.Env): ] ] - assert(self.action_space.contains(action)) + assert self.action_space.contains(action) if self.firstAction is None: self.firstAction = action diff --git a/gym/envs/parameter_tuning/convergence.py b/gym/envs/parameter_tuning/convergence.py index a0158575e..ce0924573 100644 --- a/gym/envs/parameter_tuning/convergence.py +++ b/gym/envs/parameter_tuning/convergence.py @@ -19,29 +19,29 @@ import math class ConvergenceControl(gym.Env): """Environment where agent learns to tune parameters of training DURING the training of the neural network to improve its convergence / - performance on the validation set. - - Parameters can be tuned after every epoch. Parameters tuned are learning + performance on the validation set. + + Parameters can be tuned after every epoch. Parameters tuned are learning rate, learning rate decay, momentum, batch size, L1 / L2 regularization. - + Agent is provided with feedback on validation accuracy, as well as on the size of dataset and number of classes, and some coarse description of architecture being optimized. - - The most close publication that I am aware of that tries to solve similar + + The most close publication that I am aware of that tries to solve similar environment is - - http://research.microsoft.com/pubs/259048/daniel2016stepsizecontrol.pdf - + + http://research.microsoft.com/pubs/259048/daniel2016stepsizecontrol.pdf + """ - + metadata = {"render.modes": ["human"]} - + def __init__(self, natural=False): """ Initialize environment """ - + # I use array of len 1 to store constants (otherwise there were some errors) self.action_space = spaces.Tuple(( spaces.Box(-5.0,0.0, 1), # learning rate @@ -51,13 +51,13 @@ class ConvergenceControl(gym.Env): spaces.Box(-6.0,1.0, 1), # l1 reg spaces.Box(-6.0,1.0, 1), # l2 reg )) - - # observation features, in order: num of instances, num of labels, - # number of filter in part A / B of neural net, num of neurons in + + # observation features, in order: num of instances, num of labels, + # number of filter in part A / B of neural net, num of neurons in # output layer, validation accuracy after training with given - # parameters + # parameters self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy - + # Start the first game self._reset() @@ -65,94 +65,94 @@ class ConvergenceControl(gym.Env): """ Perform some action in the environment """ - assert(self.action_space.contains(action)) - + assert self.action_space.contains(action) + lr, decay, momentum, batch_size, l1, l2 = action; - - - # map ranges of inputs + + + # map ranges of inputs lr = (10.0 ** lr[0]).astype('float32') decay = (10.0 ** decay[0]).astype('float32') momentum = (10.0 ** momentum[0]).astype('float32') - + batch_size = int( 2 ** batch_size[0] ) - + l1 = (10.0 ** l1[0]).astype('float32') l2 = (10.0 ** l2[0]).astype('float32') - + """ names = ["lr", "decay", "mom", "batch", "l1", "l2"] values = [lr, decay, momentum, batch_size, l1, l2] - + for n,v in zip(names, values): print(n,v) """ - + X,Y,Xv,Yv = self.data - + # set parameters of training step - + self.sgd.lr.set_value(lr) self.sgd.decay.set_value(decay) self.sgd.momentum.set_value(momentum) - + self.reg.l1.set_value(l1) self.reg.l2.set_value(l2) - + # train model for one epoch_idx H = self.model.fit(X, Y, batch_size=int(batch_size), nb_epoch=1, shuffle=True) - + _, acc = self.model.evaluate(Xv,Yv) - + # save best validation if acc > self.best_val: self.best_val = acc - + self.previous_acc = acc; - + self.epoch_idx = self.epoch_idx + 1 - + diverged = math.isnan( H.history['loss'][-1] ) done = self.epoch_idx == 20 or diverged - + if diverged: """ maybe not set to a very large value; if you get something nice, - but then diverge, maybe it is not too bad + but then diverge, maybe it is not too bad """ reward = -100.0 else: reward = self.best_val - - # as number of labels increases, learning problem becomes + + # as number of labels increases, learning problem becomes # more difficult for fixed dataset size. In order to avoid # for the agent to ignore more complex datasets, on which - # accuracy is low and concentrate on simple cases which bring bulk + # accuracy is low and concentrate on simple cases which bring bulk # of reward, I normalize by number of labels in dataset - + reward = reward * self.nb_classes - + # formula below encourages higher best validation - + reward = reward + reward ** 2 - + return self._get_obs(), reward, done, {} def _render(self, mode="human", close=False): - + if close: return - + print(">> Step ",self.epoch_idx,"best validation:", self.best_val) def _get_obs(self): """ Observe the environment. Is usually used after the step is taken """ - # observation as per observation space - return np.array([self.nb_classes, + # observation as per observation space + return np.array([self.nb_classes, self.nb_inst, self.convAsz, self.convBsz, @@ -160,130 +160,130 @@ class ConvergenceControl(gym.Env): self.previous_acc]) def data_mix(self): - + # randomly choose dataset dataset = random.choice(['mnist', 'cifar10', 'cifar100'])# - + n_labels = 10 - + if dataset == "mnist": data = mnist.load_data() - + if dataset == "cifar10": data = cifar10.load_data() - + if dataset == "cifar100": data = cifar100.load_data() n_labels = 100 - + # Choose dataset size. This affects regularization needed r = np.random.rand() - - # not using full dataset to make regularization more important and + + # not using full dataset to make regularization more important and # speed up testing a little bit data_size = int( 2000 * (1-r) + 40000 * r ) - - # I do not use test data for validation, but last 10000 instances in dataset + + # I do not use test data for validation, but last 10000 instances in dataset # so that trained models can be compared to results in literature (CX, CY), (CXt, CYt) = data - + if dataset == "mnist": CX = np.expand_dims(CX, axis=1) - + data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:]; - + return data, n_labels def _reset(self): - + reg = WeightRegularizer() - + # a hack to make regularization variable reg.l1 = K.variable(0.0) reg.l2 = K.variable(0.0) - - + + data, nb_classes = self.data_mix() X, Y, Xv, Yv = data - + # input square image dimensions img_rows, img_cols = X.shape[-1], X.shape[-1] img_channels = X.shape[1] # save number of classes and instances self.nb_classes = nb_classes self.nb_inst = len(X) - + # convert class vectors to binary class matrices Y = np_utils.to_categorical(Y, nb_classes) Yv = np_utils.to_categorical(Yv, nb_classes) - + # here definition of the model happens model = Sequential() - + # double true for icnreased probability of conv layers - if random.choice([True, True, False]): - + if random.choice([True, True, False]): + # Choose convolution #1 self.convAsz = random.choice([32,64,128]) - + model.add(Convolution2D(self.convAsz, 3, 3, border_mode='same', input_shape=(img_channels, img_rows, img_cols), W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) - + model.add(Convolution2D(self.convAsz, 3, 3, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) - + model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) - + # Choose convolution size B (if needed) self.convBsz = random.choice([0,32,64]) - + if self.convBsz > 0: model.add(Convolution2D(self.convBsz, 3, 3, border_mode='same', W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) - + model.add(Convolution2D(self.convBsz, 3, 3, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) - + model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) - + model.add(Flatten()) - + else: model.add(Flatten(input_shape=(img_channels, img_rows, img_cols))) self.convAsz = 0 self.convBsz = 0 - + # choose fully connected layer size self.densesz = random.choice([256,512,762]) - + model.add(Dense(self.densesz, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) model.add(Dropout(0.5)) - + model.add(Dense(nb_classes, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('softmax')) - + # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) - + X = X.astype('float32') Xv = Xv.astype('float32') X /= 255 @@ -292,12 +292,12 @@ class ConvergenceControl(gym.Env): self.data = (X,Y,Xv,Yv) self.model = model self.sgd = sgd - + # initial accuracy values self.best_val = 0.0 self.previous_acc = 0.0 - + self.reg = reg self.epoch_idx = 0 - + return self._get_obs() diff --git a/gym/envs/parameter_tuning/train_deep_cnn.py b/gym/envs/parameter_tuning/train_deep_cnn.py index bc90b182b..ec4a3b519 100644 --- a/gym/envs/parameter_tuning/train_deep_cnn.py +++ b/gym/envs/parameter_tuning/train_deep_cnn.py @@ -59,7 +59,7 @@ class CNNClassifierTraining(gym.Env): """ Perform some action in the environment """ - assert (self.action_space.contains(action)) + assert self.action_space.contains(action) lr, decay, momentum, batch_size, l1, l2, convs, fcs = action @@ -274,4 +274,4 @@ class CNNClassifierTraining(gym.Env): if not diverged: _, acc = model.evaluate(Xv, Yv) - return diverged, acc \ No newline at end of file + return diverged, acc diff --git a/gym/envs/toy_text/blackjack.py b/gym/envs/toy_text/blackjack.py index 42b725f30..788fee291 100644 --- a/gym/envs/toy_text/blackjack.py +++ b/gym/envs/toy_text/blackjack.py @@ -89,7 +89,7 @@ class BlackjackEnv(gym.Env): return [seed] def _step(self, action): - assert(self.action_space.contains(action)) + assert self.action_space.contains(action) if action: # hit: add a card to players hand and return self.player.append(draw_card(self.np_random)) if is_bust(self.player): diff --git a/gym/envs/toy_text/nchain.py b/gym/envs/toy_text/nchain.py index 156908b68..d6a72701a 100644 --- a/gym/envs/toy_text/nchain.py +++ b/gym/envs/toy_text/nchain.py @@ -36,7 +36,7 @@ class NChainEnv(gym.Env): return [seed] def _step(self, action): - assert(self.action_space.contains(action)) + assert self.action_space.contains(action) if self.np_random.rand() < self.slip: action = not action # agent slipped, reverse action taken if action: # 'backwards': go back to the beginning, get small reward