mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-28 17:27:08 +00:00
Assert is a keyword, not a function
This commit is contained in:
@@ -41,7 +41,7 @@ class AlgorithmicEnv(Env):
|
|||||||
def _get_obs(self, pos=None):
|
def _get_obs(self, pos=None):
|
||||||
if pos is None:
|
if pos is None:
|
||||||
pos = self.x
|
pos = self.x
|
||||||
assert(isinstance(pos, np.ndarray) and pos.shape[0] == self.inp_dim)
|
assert isinstance(pos, np.ndarray) and pos.shape[0] == self.inp_dim
|
||||||
if ha(pos) not in self.content:
|
if ha(pos) not in self.content:
|
||||||
self.content[ha(pos)] = self.base
|
self.content[ha(pos)] = self.base
|
||||||
return self.content[ha(pos)]
|
return self.content[ha(pos)]
|
||||||
@@ -90,7 +90,7 @@ class AlgorithmicEnv(Env):
|
|||||||
x_str = label + x_str
|
x_str = label + x_str
|
||||||
return x_str
|
return x_str
|
||||||
else:
|
else:
|
||||||
assert(False)
|
assert False
|
||||||
|
|
||||||
|
|
||||||
def _render(self, mode='human', close=False):
|
def _render(self, mode='human', close=False):
|
||||||
|
@@ -406,7 +406,7 @@ class BipedalWalker(gym.Env):
|
|||||||
1.0 if self.legs[3].ground_contact else 0.0
|
1.0 if self.legs[3].ground_contact else 0.0
|
||||||
]
|
]
|
||||||
state += [l.fraction for l in self.lidar]
|
state += [l.fraction for l in self.lidar]
|
||||||
assert(len(state)==24)
|
assert len(state)==24
|
||||||
|
|
||||||
self.scroll = pos.x - VIEWPORT_W/SCALE/5
|
self.scroll = pos.x - VIEWPORT_W/SCALE/5
|
||||||
|
|
||||||
|
@@ -263,7 +263,7 @@ class LunarLander(gym.Env):
|
|||||||
1.0 if self.legs[0].ground_contact else 0.0,
|
1.0 if self.legs[0].ground_contact else 0.0,
|
||||||
1.0 if self.legs[1].ground_contact else 0.0
|
1.0 if self.legs[1].ground_contact else 0.0
|
||||||
]
|
]
|
||||||
assert(len(state)==8)
|
assert len(state)==8
|
||||||
|
|
||||||
reward = 0
|
reward = 0
|
||||||
shaping = \
|
shaping = \
|
||||||
|
@@ -21,7 +21,7 @@ class OneRoundDeterministicRewardEnv(gym.Env):
|
|||||||
self._reset()
|
self._reset()
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
assert(self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
if action:
|
if action:
|
||||||
reward = 1
|
reward = 1
|
||||||
else:
|
else:
|
||||||
|
@@ -22,7 +22,7 @@ class OneRoundNondeterministicRewardEnv(gym.Env):
|
|||||||
self._reset()
|
self._reset()
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
assert(self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
if action:
|
if action:
|
||||||
#your agent should figure out that this option has expected value 2.5
|
#your agent should figure out that this option has expected value 2.5
|
||||||
reward = random.choice([0, 5])
|
reward = random.choice([0, 5])
|
||||||
|
@@ -28,7 +28,7 @@ class TwoRoundDeterministicRewardEnv(gym.Env):
|
|||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
rewards = [[0, 3], [1, 2]]
|
rewards = [[0, 3], [1, 2]]
|
||||||
|
|
||||||
assert(self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
|
|
||||||
if self.firstAction is None:
|
if self.firstAction is None:
|
||||||
self.firstAction = action
|
self.firstAction = action
|
||||||
|
@@ -38,7 +38,7 @@ class TwoRoundNondeterministicRewardEnv(gym.Env):
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
assert(self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
|
|
||||||
if self.firstAction is None:
|
if self.firstAction is None:
|
||||||
self.firstAction = action
|
self.firstAction = action
|
||||||
|
@@ -19,29 +19,29 @@ import math
|
|||||||
class ConvergenceControl(gym.Env):
|
class ConvergenceControl(gym.Env):
|
||||||
"""Environment where agent learns to tune parameters of training
|
"""Environment where agent learns to tune parameters of training
|
||||||
DURING the training of the neural network to improve its convergence /
|
DURING the training of the neural network to improve its convergence /
|
||||||
performance on the validation set.
|
performance on the validation set.
|
||||||
|
|
||||||
Parameters can be tuned after every epoch. Parameters tuned are learning
|
Parameters can be tuned after every epoch. Parameters tuned are learning
|
||||||
rate, learning rate decay, momentum, batch size, L1 / L2 regularization.
|
rate, learning rate decay, momentum, batch size, L1 / L2 regularization.
|
||||||
|
|
||||||
Agent is provided with feedback on validation accuracy, as well as on
|
Agent is provided with feedback on validation accuracy, as well as on
|
||||||
the size of dataset and number of classes, and some coarse description of
|
the size of dataset and number of classes, and some coarse description of
|
||||||
architecture being optimized.
|
architecture being optimized.
|
||||||
|
|
||||||
The most close publication that I am aware of that tries to solve similar
|
The most close publication that I am aware of that tries to solve similar
|
||||||
environment is
|
environment is
|
||||||
|
|
||||||
http://research.microsoft.com/pubs/259048/daniel2016stepsizecontrol.pdf
|
http://research.microsoft.com/pubs/259048/daniel2016stepsizecontrol.pdf
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
metadata = {"render.modes": ["human"]}
|
metadata = {"render.modes": ["human"]}
|
||||||
|
|
||||||
def __init__(self, natural=False):
|
def __init__(self, natural=False):
|
||||||
"""
|
"""
|
||||||
Initialize environment
|
Initialize environment
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# I use array of len 1 to store constants (otherwise there were some errors)
|
# I use array of len 1 to store constants (otherwise there were some errors)
|
||||||
self.action_space = spaces.Tuple((
|
self.action_space = spaces.Tuple((
|
||||||
spaces.Box(-5.0,0.0, 1), # learning rate
|
spaces.Box(-5.0,0.0, 1), # learning rate
|
||||||
@@ -51,13 +51,13 @@ class ConvergenceControl(gym.Env):
|
|||||||
spaces.Box(-6.0,1.0, 1), # l1 reg
|
spaces.Box(-6.0,1.0, 1), # l1 reg
|
||||||
spaces.Box(-6.0,1.0, 1), # l2 reg
|
spaces.Box(-6.0,1.0, 1), # l2 reg
|
||||||
))
|
))
|
||||||
|
|
||||||
# observation features, in order: num of instances, num of labels,
|
# observation features, in order: num of instances, num of labels,
|
||||||
# number of filter in part A / B of neural net, num of neurons in
|
# number of filter in part A / B of neural net, num of neurons in
|
||||||
# output layer, validation accuracy after training with given
|
# output layer, validation accuracy after training with given
|
||||||
# parameters
|
# parameters
|
||||||
self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy
|
self.observation_space = spaces.Box(-1e5,1e5, 6) # validation accuracy
|
||||||
|
|
||||||
# Start the first game
|
# Start the first game
|
||||||
self._reset()
|
self._reset()
|
||||||
|
|
||||||
@@ -65,94 +65,94 @@ class ConvergenceControl(gym.Env):
|
|||||||
"""
|
"""
|
||||||
Perform some action in the environment
|
Perform some action in the environment
|
||||||
"""
|
"""
|
||||||
assert(self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
|
|
||||||
lr, decay, momentum, batch_size, l1, l2 = action;
|
lr, decay, momentum, batch_size, l1, l2 = action;
|
||||||
|
|
||||||
|
|
||||||
# map ranges of inputs
|
# map ranges of inputs
|
||||||
lr = (10.0 ** lr[0]).astype('float32')
|
lr = (10.0 ** lr[0]).astype('float32')
|
||||||
decay = (10.0 ** decay[0]).astype('float32')
|
decay = (10.0 ** decay[0]).astype('float32')
|
||||||
momentum = (10.0 ** momentum[0]).astype('float32')
|
momentum = (10.0 ** momentum[0]).astype('float32')
|
||||||
|
|
||||||
batch_size = int( 2 ** batch_size[0] )
|
batch_size = int( 2 ** batch_size[0] )
|
||||||
|
|
||||||
l1 = (10.0 ** l1[0]).astype('float32')
|
l1 = (10.0 ** l1[0]).astype('float32')
|
||||||
l2 = (10.0 ** l2[0]).astype('float32')
|
l2 = (10.0 ** l2[0]).astype('float32')
|
||||||
|
|
||||||
"""
|
"""
|
||||||
names = ["lr", "decay", "mom", "batch", "l1", "l2"]
|
names = ["lr", "decay", "mom", "batch", "l1", "l2"]
|
||||||
values = [lr, decay, momentum, batch_size, l1, l2]
|
values = [lr, decay, momentum, batch_size, l1, l2]
|
||||||
|
|
||||||
for n,v in zip(names, values):
|
for n,v in zip(names, values):
|
||||||
print(n,v)
|
print(n,v)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
X,Y,Xv,Yv = self.data
|
X,Y,Xv,Yv = self.data
|
||||||
|
|
||||||
# set parameters of training step
|
# set parameters of training step
|
||||||
|
|
||||||
self.sgd.lr.set_value(lr)
|
self.sgd.lr.set_value(lr)
|
||||||
self.sgd.decay.set_value(decay)
|
self.sgd.decay.set_value(decay)
|
||||||
self.sgd.momentum.set_value(momentum)
|
self.sgd.momentum.set_value(momentum)
|
||||||
|
|
||||||
self.reg.l1.set_value(l1)
|
self.reg.l1.set_value(l1)
|
||||||
self.reg.l2.set_value(l2)
|
self.reg.l2.set_value(l2)
|
||||||
|
|
||||||
# train model for one epoch_idx
|
# train model for one epoch_idx
|
||||||
H = self.model.fit(X, Y,
|
H = self.model.fit(X, Y,
|
||||||
batch_size=int(batch_size),
|
batch_size=int(batch_size),
|
||||||
nb_epoch=1,
|
nb_epoch=1,
|
||||||
shuffle=True)
|
shuffle=True)
|
||||||
|
|
||||||
_, acc = self.model.evaluate(Xv,Yv)
|
_, acc = self.model.evaluate(Xv,Yv)
|
||||||
|
|
||||||
# save best validation
|
# save best validation
|
||||||
if acc > self.best_val:
|
if acc > self.best_val:
|
||||||
self.best_val = acc
|
self.best_val = acc
|
||||||
|
|
||||||
self.previous_acc = acc;
|
self.previous_acc = acc;
|
||||||
|
|
||||||
self.epoch_idx = self.epoch_idx + 1
|
self.epoch_idx = self.epoch_idx + 1
|
||||||
|
|
||||||
diverged = math.isnan( H.history['loss'][-1] )
|
diverged = math.isnan( H.history['loss'][-1] )
|
||||||
done = self.epoch_idx == 20 or diverged
|
done = self.epoch_idx == 20 or diverged
|
||||||
|
|
||||||
if diverged:
|
if diverged:
|
||||||
""" maybe not set to a very large value; if you get something nice,
|
""" maybe not set to a very large value; if you get something nice,
|
||||||
but then diverge, maybe it is not too bad
|
but then diverge, maybe it is not too bad
|
||||||
"""
|
"""
|
||||||
reward = -100.0
|
reward = -100.0
|
||||||
else:
|
else:
|
||||||
reward = self.best_val
|
reward = self.best_val
|
||||||
|
|
||||||
# as number of labels increases, learning problem becomes
|
# as number of labels increases, learning problem becomes
|
||||||
# more difficult for fixed dataset size. In order to avoid
|
# more difficult for fixed dataset size. In order to avoid
|
||||||
# for the agent to ignore more complex datasets, on which
|
# for the agent to ignore more complex datasets, on which
|
||||||
# accuracy is low and concentrate on simple cases which bring bulk
|
# accuracy is low and concentrate on simple cases which bring bulk
|
||||||
# of reward, I normalize by number of labels in dataset
|
# of reward, I normalize by number of labels in dataset
|
||||||
|
|
||||||
reward = reward * self.nb_classes
|
reward = reward * self.nb_classes
|
||||||
|
|
||||||
# formula below encourages higher best validation
|
# formula below encourages higher best validation
|
||||||
|
|
||||||
reward = reward + reward ** 2
|
reward = reward + reward ** 2
|
||||||
|
|
||||||
return self._get_obs(), reward, done, {}
|
return self._get_obs(), reward, done, {}
|
||||||
|
|
||||||
def _render(self, mode="human", close=False):
|
def _render(self, mode="human", close=False):
|
||||||
|
|
||||||
if close:
|
if close:
|
||||||
return
|
return
|
||||||
|
|
||||||
print(">> Step ",self.epoch_idx,"best validation:", self.best_val)
|
print(">> Step ",self.epoch_idx,"best validation:", self.best_val)
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
"""
|
"""
|
||||||
Observe the environment. Is usually used after the step is taken
|
Observe the environment. Is usually used after the step is taken
|
||||||
"""
|
"""
|
||||||
# observation as per observation space
|
# observation as per observation space
|
||||||
return np.array([self.nb_classes,
|
return np.array([self.nb_classes,
|
||||||
self.nb_inst,
|
self.nb_inst,
|
||||||
self.convAsz,
|
self.convAsz,
|
||||||
self.convBsz,
|
self.convBsz,
|
||||||
@@ -160,130 +160,130 @@ class ConvergenceControl(gym.Env):
|
|||||||
self.previous_acc])
|
self.previous_acc])
|
||||||
|
|
||||||
def data_mix(self):
|
def data_mix(self):
|
||||||
|
|
||||||
# randomly choose dataset
|
# randomly choose dataset
|
||||||
dataset = random.choice(['mnist', 'cifar10', 'cifar100'])#
|
dataset = random.choice(['mnist', 'cifar10', 'cifar100'])#
|
||||||
|
|
||||||
n_labels = 10
|
n_labels = 10
|
||||||
|
|
||||||
if dataset == "mnist":
|
if dataset == "mnist":
|
||||||
data = mnist.load_data()
|
data = mnist.load_data()
|
||||||
|
|
||||||
if dataset == "cifar10":
|
if dataset == "cifar10":
|
||||||
data = cifar10.load_data()
|
data = cifar10.load_data()
|
||||||
|
|
||||||
if dataset == "cifar100":
|
if dataset == "cifar100":
|
||||||
data = cifar100.load_data()
|
data = cifar100.load_data()
|
||||||
n_labels = 100
|
n_labels = 100
|
||||||
|
|
||||||
# Choose dataset size. This affects regularization needed
|
# Choose dataset size. This affects regularization needed
|
||||||
r = np.random.rand()
|
r = np.random.rand()
|
||||||
|
|
||||||
# not using full dataset to make regularization more important and
|
# not using full dataset to make regularization more important and
|
||||||
# speed up testing a little bit
|
# speed up testing a little bit
|
||||||
data_size = int( 2000 * (1-r) + 40000 * r )
|
data_size = int( 2000 * (1-r) + 40000 * r )
|
||||||
|
|
||||||
# I do not use test data for validation, but last 10000 instances in dataset
|
# I do not use test data for validation, but last 10000 instances in dataset
|
||||||
# so that trained models can be compared to results in literature
|
# so that trained models can be compared to results in literature
|
||||||
(CX, CY), (CXt, CYt) = data
|
(CX, CY), (CXt, CYt) = data
|
||||||
|
|
||||||
if dataset == "mnist":
|
if dataset == "mnist":
|
||||||
CX = np.expand_dims(CX, axis=1)
|
CX = np.expand_dims(CX, axis=1)
|
||||||
|
|
||||||
data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:];
|
data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:];
|
||||||
|
|
||||||
return data, n_labels
|
return data, n_labels
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
|
|
||||||
reg = WeightRegularizer()
|
reg = WeightRegularizer()
|
||||||
|
|
||||||
# a hack to make regularization variable
|
# a hack to make regularization variable
|
||||||
reg.l1 = K.variable(0.0)
|
reg.l1 = K.variable(0.0)
|
||||||
reg.l2 = K.variable(0.0)
|
reg.l2 = K.variable(0.0)
|
||||||
|
|
||||||
|
|
||||||
data, nb_classes = self.data_mix()
|
data, nb_classes = self.data_mix()
|
||||||
X, Y, Xv, Yv = data
|
X, Y, Xv, Yv = data
|
||||||
|
|
||||||
# input square image dimensions
|
# input square image dimensions
|
||||||
img_rows, img_cols = X.shape[-1], X.shape[-1]
|
img_rows, img_cols = X.shape[-1], X.shape[-1]
|
||||||
img_channels = X.shape[1]
|
img_channels = X.shape[1]
|
||||||
# save number of classes and instances
|
# save number of classes and instances
|
||||||
self.nb_classes = nb_classes
|
self.nb_classes = nb_classes
|
||||||
self.nb_inst = len(X)
|
self.nb_inst = len(X)
|
||||||
|
|
||||||
# convert class vectors to binary class matrices
|
# convert class vectors to binary class matrices
|
||||||
Y = np_utils.to_categorical(Y, nb_classes)
|
Y = np_utils.to_categorical(Y, nb_classes)
|
||||||
Yv = np_utils.to_categorical(Yv, nb_classes)
|
Yv = np_utils.to_categorical(Yv, nb_classes)
|
||||||
|
|
||||||
# here definition of the model happens
|
# here definition of the model happens
|
||||||
model = Sequential()
|
model = Sequential()
|
||||||
|
|
||||||
# double true for icnreased probability of conv layers
|
# double true for icnreased probability of conv layers
|
||||||
if random.choice([True, True, False]):
|
if random.choice([True, True, False]):
|
||||||
|
|
||||||
# Choose convolution #1
|
# Choose convolution #1
|
||||||
self.convAsz = random.choice([32,64,128])
|
self.convAsz = random.choice([32,64,128])
|
||||||
|
|
||||||
model.add(Convolution2D(self.convAsz, 3, 3, border_mode='same',
|
model.add(Convolution2D(self.convAsz, 3, 3, border_mode='same',
|
||||||
input_shape=(img_channels, img_rows, img_cols),
|
input_shape=(img_channels, img_rows, img_cols),
|
||||||
W_regularizer = reg,
|
W_regularizer = reg,
|
||||||
b_regularizer = reg))
|
b_regularizer = reg))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
|
|
||||||
model.add(Convolution2D(self.convAsz, 3, 3,
|
model.add(Convolution2D(self.convAsz, 3, 3,
|
||||||
W_regularizer = reg,
|
W_regularizer = reg,
|
||||||
b_regularizer = reg))
|
b_regularizer = reg))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
|
|
||||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||||
model.add(Dropout(0.25))
|
model.add(Dropout(0.25))
|
||||||
|
|
||||||
# Choose convolution size B (if needed)
|
# Choose convolution size B (if needed)
|
||||||
self.convBsz = random.choice([0,32,64])
|
self.convBsz = random.choice([0,32,64])
|
||||||
|
|
||||||
if self.convBsz > 0:
|
if self.convBsz > 0:
|
||||||
model.add(Convolution2D(self.convBsz, 3, 3, border_mode='same',
|
model.add(Convolution2D(self.convBsz, 3, 3, border_mode='same',
|
||||||
W_regularizer = reg,
|
W_regularizer = reg,
|
||||||
b_regularizer = reg))
|
b_regularizer = reg))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
|
|
||||||
model.add(Convolution2D(self.convBsz, 3, 3,
|
model.add(Convolution2D(self.convBsz, 3, 3,
|
||||||
W_regularizer = reg,
|
W_regularizer = reg,
|
||||||
b_regularizer = reg))
|
b_regularizer = reg))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
|
|
||||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||||
model.add(Dropout(0.25))
|
model.add(Dropout(0.25))
|
||||||
|
|
||||||
model.add(Flatten())
|
model.add(Flatten())
|
||||||
|
|
||||||
else:
|
else:
|
||||||
model.add(Flatten(input_shape=(img_channels, img_rows, img_cols)))
|
model.add(Flatten(input_shape=(img_channels, img_rows, img_cols)))
|
||||||
self.convAsz = 0
|
self.convAsz = 0
|
||||||
self.convBsz = 0
|
self.convBsz = 0
|
||||||
|
|
||||||
# choose fully connected layer size
|
# choose fully connected layer size
|
||||||
self.densesz = random.choice([256,512,762])
|
self.densesz = random.choice([256,512,762])
|
||||||
|
|
||||||
model.add(Dense(self.densesz,
|
model.add(Dense(self.densesz,
|
||||||
W_regularizer = reg,
|
W_regularizer = reg,
|
||||||
b_regularizer = reg))
|
b_regularizer = reg))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
model.add(Dropout(0.5))
|
model.add(Dropout(0.5))
|
||||||
|
|
||||||
model.add(Dense(nb_classes,
|
model.add(Dense(nb_classes,
|
||||||
W_regularizer = reg,
|
W_regularizer = reg,
|
||||||
b_regularizer = reg))
|
b_regularizer = reg))
|
||||||
model.add(Activation('softmax'))
|
model.add(Activation('softmax'))
|
||||||
|
|
||||||
# let's train the model using SGD + momentum (how original).
|
# let's train the model using SGD + momentum (how original).
|
||||||
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
|
||||||
model.compile(loss='categorical_crossentropy',
|
model.compile(loss='categorical_crossentropy',
|
||||||
optimizer=sgd,
|
optimizer=sgd,
|
||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
|
|
||||||
X = X.astype('float32')
|
X = X.astype('float32')
|
||||||
Xv = Xv.astype('float32')
|
Xv = Xv.astype('float32')
|
||||||
X /= 255
|
X /= 255
|
||||||
@@ -292,12 +292,12 @@ class ConvergenceControl(gym.Env):
|
|||||||
self.data = (X,Y,Xv,Yv)
|
self.data = (X,Y,Xv,Yv)
|
||||||
self.model = model
|
self.model = model
|
||||||
self.sgd = sgd
|
self.sgd = sgd
|
||||||
|
|
||||||
# initial accuracy values
|
# initial accuracy values
|
||||||
self.best_val = 0.0
|
self.best_val = 0.0
|
||||||
self.previous_acc = 0.0
|
self.previous_acc = 0.0
|
||||||
|
|
||||||
self.reg = reg
|
self.reg = reg
|
||||||
self.epoch_idx = 0
|
self.epoch_idx = 0
|
||||||
|
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
@@ -59,7 +59,7 @@ class CNNClassifierTraining(gym.Env):
|
|||||||
"""
|
"""
|
||||||
Perform some action in the environment
|
Perform some action in the environment
|
||||||
"""
|
"""
|
||||||
assert (self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
|
|
||||||
lr, decay, momentum, batch_size, l1, l2, convs, fcs = action
|
lr, decay, momentum, batch_size, l1, l2, convs, fcs = action
|
||||||
|
|
||||||
@@ -274,4 +274,4 @@ class CNNClassifierTraining(gym.Env):
|
|||||||
if not diverged:
|
if not diverged:
|
||||||
_, acc = model.evaluate(Xv, Yv)
|
_, acc = model.evaluate(Xv, Yv)
|
||||||
|
|
||||||
return diverged, acc
|
return diverged, acc
|
||||||
|
@@ -89,7 +89,7 @@ class BlackjackEnv(gym.Env):
|
|||||||
return [seed]
|
return [seed]
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
assert(self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
if action: # hit: add a card to players hand and return
|
if action: # hit: add a card to players hand and return
|
||||||
self.player.append(draw_card(self.np_random))
|
self.player.append(draw_card(self.np_random))
|
||||||
if is_bust(self.player):
|
if is_bust(self.player):
|
||||||
|
@@ -36,7 +36,7 @@ class NChainEnv(gym.Env):
|
|||||||
return [seed]
|
return [seed]
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
assert(self.action_space.contains(action))
|
assert self.action_space.contains(action)
|
||||||
if self.np_random.rand() < self.slip:
|
if self.np_random.rand() < self.slip:
|
||||||
action = not action # agent slipped, reverse action taken
|
action = not action # agent slipped, reverse action taken
|
||||||
if action: # 'backwards': go back to the beginning, get small reward
|
if action: # 'backwards': go back to the beginning, get small reward
|
||||||
|
Reference in New Issue
Block a user