Compare commits
49 Commits
peterz_tfl
...
peterz_ben
Author | SHA1 | Date | |
---|---|---|---|
|
ea68f3b7e6 | ||
|
ca721a4be6 | ||
|
72f3572a10 | ||
|
b9cd941471 | ||
|
0899b71ede | ||
|
cc8c9541fb | ||
|
cb32522394 | ||
|
1e40ec22be | ||
|
701a36cdfa | ||
|
5a7f9847d8 | ||
|
b63134e5c5 | ||
|
db314cdeda | ||
|
b08c083d91 | ||
|
bfbbe66d9e | ||
|
1c5c6563b7 | ||
|
1fa8c58da5 | ||
|
f6d1115ead | ||
|
f6d5a47bed | ||
|
c2df27bee4 | ||
|
974c15756e | ||
|
ad43fd9a35 | ||
|
72c357c638 | ||
|
e00e5ca016 | ||
|
705797f2f0 | ||
|
fcd84aa831 | ||
|
390b51597a | ||
|
95104a3592 | ||
|
3528f7b992 | ||
|
151e48009e | ||
|
92f33335e9 | ||
|
af729cff15 | ||
|
10f815fe1d | ||
|
8c4adac898 | ||
|
2a93ea8782 | ||
|
9c48f9fad5 | ||
|
348cbb4b71 | ||
|
a1602ab15f | ||
|
e63e69bb14 | ||
|
385e7e5c0d | ||
|
d112a2e49f | ||
|
e662dd6409 | ||
|
efc6bffce3 | ||
|
872181d4c3 | ||
|
628ddecf6a | ||
|
83a4a4be65 | ||
|
7edac38c73 | ||
|
a6dca44115 | ||
|
622915c473 | ||
|
a1d3c18ec0 |
@@ -1 +1 @@
|
||||
ppo2
|
||||
|
||||
|
@@ -139,3 +139,4 @@ To cite this repository in publications:
|
||||
journal = {GitHub repository},
|
||||
howpublished = {\url{https://github.com/openai/baselines}},
|
||||
}
|
||||
|
||||
|
@@ -156,7 +156,7 @@ class FrameStack(gym.Wrapper):
|
||||
self.k = k
|
||||
self.frames = deque([], maxlen=k)
|
||||
shp = env.observation_space.shape
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=np.uint8)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)
|
||||
|
||||
def reset(self):
|
||||
ob = self.env.reset()
|
||||
@@ -176,6 +176,7 @@ class FrameStack(gym.Wrapper):
|
||||
class ScaledFloatFrame(gym.ObservationWrapper):
|
||||
def __init__(self, env):
|
||||
gym.ObservationWrapper.__init__(self, env)
|
||||
self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
|
||||
|
||||
def observation(self, observation):
|
||||
# careful! This undoes the memory optimization, use
|
||||
|
@@ -92,48 +92,6 @@ def lstm(nlstm=128, layer_norm=False):
|
||||
|
||||
return network_fn
|
||||
|
||||
def tflstm_static(nlstm=128, layer_norm=False):
|
||||
def network_fn(X, nenv=1):
|
||||
nbatch = X.shape[0]
|
||||
nsteps = nbatch // nenv
|
||||
|
||||
h = tf.layers.flatten(X)
|
||||
rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(nlstm, state_is_tuple=False, forget_bias=0.0)
|
||||
|
||||
S = tf.placeholder(tf.float32, rnn_cell.zero_state(nenv, dtype=tf.float32).shape) #states
|
||||
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
|
||||
|
||||
xs = batch_to_seq(h, nenv, nsteps)
|
||||
|
||||
h5, snew = tf.nn.static_rnn(rnn_cell, xs, initial_state=S)
|
||||
|
||||
h = seq_to_batch(h5)
|
||||
|
||||
initial_state = np.zeros(S.shape.as_list(), dtype=float)
|
||||
|
||||
return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
|
||||
|
||||
return network_fn
|
||||
|
||||
def tflstm(nlstm=128):
|
||||
def network_fn(X, nenv=1):
|
||||
nbatch = X.shape[0]
|
||||
nsteps = nbatch // nenv
|
||||
|
||||
h = tf.layers.flatten(X)
|
||||
rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(nlstm, state_is_tuple=False, forget_bias=0.0)
|
||||
|
||||
S = tf.placeholder(tf.float32, rnn_cell.zero_state(nenv, dtype=tf.float32).shape) #states
|
||||
M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
|
||||
initial_state = np.zeros(S.shape)
|
||||
|
||||
h = tf.reshape(h, (-1, nsteps, h.shape[-1]))
|
||||
h, snew = tf.nn.dynamic_rnn(rnn_cell, h, initial_state=S)
|
||||
|
||||
h = tf.reshape(h, (-1, h.shape[-1]))
|
||||
return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
|
||||
|
||||
return network_fn
|
||||
|
||||
def cnn_lstm(nlstm=128, layer_norm=False, **conv_kwargs):
|
||||
def network_fn(X, nenv=1):
|
||||
@@ -180,7 +138,7 @@ def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs):
|
||||
'''
|
||||
|
||||
def network_fn(X):
|
||||
out = X
|
||||
out = tf.cast(X, tf.float32) / 255.
|
||||
with tf.variable_scope("convnet"):
|
||||
for num_outputs, kernel_size, stride in convs:
|
||||
out = layers.convolution2d(out,
|
||||
@@ -211,10 +169,6 @@ def get_network_builder(name):
|
||||
return mlp
|
||||
elif name == 'lstm':
|
||||
return lstm
|
||||
elif name == 'tflstm_static':
|
||||
return tflstm_static
|
||||
elif name == 'tflstm':
|
||||
return tflstm
|
||||
elif name == 'cnn_lstm':
|
||||
return cnn_lstm
|
||||
elif name == 'cnn_lnlstm':
|
||||
|
@@ -6,8 +6,7 @@ from baselines.run import get_learn_function
|
||||
|
||||
common_kwargs = dict(
|
||||
seed=0,
|
||||
total_timesteps=20000,
|
||||
nlstm=64
|
||||
total_timesteps=50000,
|
||||
)
|
||||
|
||||
learn_kwargs = {
|
||||
@@ -20,7 +19,7 @@ learn_kwargs = {
|
||||
|
||||
|
||||
alg_list = learn_kwargs.keys()
|
||||
rnn_list = ['lstm', 'tflstm', 'tflstm_static']
|
||||
rnn_list = ['lstm']
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("alg", alg_list)
|
||||
@@ -42,11 +41,11 @@ def test_fixed_sequence(alg, rnn):
|
||||
**kwargs
|
||||
)
|
||||
|
||||
simple_test(env_fn, learn, 0.3)
|
||||
simple_test(env_fn, learn, 0.7)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_fixed_sequence('ppo2', 'tflstm')
|
||||
test_fixed_sequence('ppo2', 'lstm')
|
||||
|
||||
|
||||
|
||||
|
@@ -2,7 +2,6 @@ import tensorflow as tf
|
||||
import numpy as np
|
||||
from gym.spaces import np_random
|
||||
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
|
||||
from baselines.bench.monitor import Monitor
|
||||
|
||||
N_TRIALS = 10000
|
||||
N_EPISODES = 100
|
||||
@@ -11,7 +10,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
||||
np.random.seed(0)
|
||||
np_random.seed(0)
|
||||
|
||||
env = DummyVecEnv([lambda: Monitor(env_fn(), None, allow_early_resets=True)])
|
||||
env = DummyVecEnv([env_fn])
|
||||
|
||||
|
||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
||||
|
Reference in New Issue
Block a user