dummy commit to RUN BENCHMARKS

Merge branch 'observation-dtype' of github.com:openai/baselines into peterz_benchmarks
fixed syntax in conv_only RUN BENCHMARKS
2018-08-10 09:46:43 -07:00 · 2018-08-10 09:45:50 -07:00 · 2018-08-08 16:24:39 -07:00 · 2018-08-08 15:59:59 -07:00 · 2018-08-08 15:15:03 -07:00 · 2018-08-08 15:10:39 -07:00
6 changed files with 10 additions and 56 deletions
--- a/.benchmark_pattern
+++ b/.benchmark_pattern
@@ -1 +1 @@
-ppo2
+
--- a/README.md
+++ b/README.md
@@ -139,3 +139,4 @@ To cite this repository in publications:
      journal = {GitHub repository},
      howpublished = {\url{https://github.com/openai/baselines}},
    }
+
--- a/baselines/common/atari_wrappers.py
+++ b/baselines/common/atari_wrappers.py
@@ -156,7 +156,7 @@ class FrameStack(gym.Wrapper):
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
-        self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=np.uint8)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)

    def reset(self):
        ob = self.env.reset()
@@ -176,6 +176,7 @@ class FrameStack(gym.Wrapper):
 class ScaledFloatFrame(gym.ObservationWrapper):
    def __init__(self, env):
        gym.ObservationWrapper.__init__(self, env)
+        self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)

    def observation(self, observation):
        # careful! This undoes the memory optimization, use
--- a/baselines/common/models.py
+++ b/baselines/common/models.py
@@ -92,48 +92,6 @@ def lstm(nlstm=128, layer_norm=False):

    return network_fn

-def tflstm_static(nlstm=128, layer_norm=False):
-    def network_fn(X, nenv=1):
-        nbatch = X.shape[0] 
-        nsteps = nbatch // nenv
-         
-        h = tf.layers.flatten(X)
-        rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(nlstm, state_is_tuple=False, forget_bias=0.0)
-
-        S = tf.placeholder(tf.float32, rnn_cell.zero_state(nenv, dtype=tf.float32).shape) #states
-        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
-
-        xs = batch_to_seq(h, nenv, nsteps)
-
-        h5, snew = tf.nn.static_rnn(rnn_cell, xs, initial_state=S)
-           
-        h = seq_to_batch(h5)
-
-        initial_state = np.zeros(S.shape.as_list(), dtype=float)
-
-        return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
-
-    return network_fn
-
-def tflstm(nlstm=128):
-    def network_fn(X, nenv=1):
-        nbatch = X.shape[0] 
-        nsteps = nbatch // nenv
-         
-        h = tf.layers.flatten(X)
-        rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(nlstm, state_is_tuple=False, forget_bias=0.0)
-
-        S = tf.placeholder(tf.float32, rnn_cell.zero_state(nenv, dtype=tf.float32).shape) #states
-        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
-        initial_state = np.zeros(S.shape)
-
-        h = tf.reshape(h, (-1, nsteps, h.shape[-1]))
-        h, snew = tf.nn.dynamic_rnn(rnn_cell, h, initial_state=S)     
-
-        h = tf.reshape(h, (-1, h.shape[-1]))
-        return h, {'S':S, 'M':M, 'state':snew, 'initial_state':initial_state}
-
-    return network_fn

 def cnn_lstm(nlstm=128, layer_norm=False, **conv_kwargs):
    def network_fn(X, nenv=1):
@@ -180,7 +138,7 @@ def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs):
    '''

    def network_fn(X):
-        out = X
+        out = tf.cast(X, tf.float32) / 255.
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.convolution2d(out,
@@ -211,10 +169,6 @@ def get_network_builder(name):
        return mlp
    elif name == 'lstm':
        return lstm
-    elif name == 'tflstm_static':
-        return tflstm_static
-    elif name == 'tflstm':
-        return tflstm
    elif name == 'cnn_lstm':
        return cnn_lstm
    elif name == 'cnn_lnlstm':
--- a/baselines/common/tests/test_fixed_sequence.py
+++ b/baselines/common/tests/test_fixed_sequence.py
@@ -6,8 +6,7 @@ from baselines.run import get_learn_function

 common_kwargs = dict(
    seed=0,
-    total_timesteps=20000,
-    nlstm=64
+    total_timesteps=50000,
 )
    
 learn_kwargs = {
@@ -20,7 +19,7 @@ learn_kwargs = {


 alg_list = learn_kwargs.keys()
-rnn_list = ['lstm', 'tflstm', 'tflstm_static']
+rnn_list = ['lstm']

@pytest.mark.slow
@pytest.mark.parametrize("alg", alg_list)
@@ -42,11 +41,11 @@ def test_fixed_sequence(alg, rnn):
        **kwargs
    )

-    simple_test(env_fn, learn, 0.3)
+    simple_test(env_fn, learn, 0.7)


 if __name__ == '__main__':
-    test_fixed_sequence('ppo2', 'tflstm')
+    test_fixed_sequence('ppo2', 'lstm')

    

--- a/baselines/common/tests/util.py
+++ b/baselines/common/tests/util.py
@@ -2,7 +2,6 @@ import tensorflow as tf
 import numpy as np
 from gym.spaces import np_random
 from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
-from baselines.bench.monitor import Monitor

 N_TRIALS = 10000
 N_EPISODES = 100
@@ -11,7 +10,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
    np.random.seed(0)
    np_random.seed(0)

-    env = DummyVecEnv([lambda: Monitor(env_fn(), None, allow_early_resets=True)])
+    env = DummyVecEnv([env_fn])


    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
Author	SHA1	Message	Date
Peter Zhokhov	ea68f3b7e6	dummy commit to RUN BENCHMARKS	2018-08-10 09:46:43 -07:00
Peter Zhokhov	ca721a4be6	Merge branch 'observation-dtype' of github.com:openai/baselines into peterz_benchmarks	2018-08-10 09:45:50 -07:00
Peter Zhokhov	72f3572a10	fixed syntax in conv_only RUN BENCHMARKS	2018-08-08 16:24:39 -07:00
Peter Zhokhov	b9cd941471	dummy commit to RUN BENCHMARKS	2018-08-08 15:59:59 -07:00
Peter Zhokhov	0899b71ede	scale the images in conv_only RUN BENCHMARKS	2018-08-08 15:15:03 -07:00
Peter Zhokhov	cc8c9541fb	dummy commit to RUN BENCHMARKS	2018-08-08 15:10:39 -07:00
Peter Zhokhov	cb32522394	enable all benchmarks	2018-08-08 15:10:00 -07:00
Peter Zhokhov	1e40ec22be	dummy commit to RUN BENCHMARKS	2018-08-08 10:45:18 -07:00
Peter Zhokhov	701a36cdfa	added a note in README about TfRunningMeanStd and serialization of VecNormalize	2018-08-08 10:44:58 -07:00
Peter Zhokhov	5a7f9847d8	flake8 complaints	2018-08-03 13:59:58 -07:00
Peter Zhokhov	b63134e5c5	added acer runner (missing import)	2018-08-03 13:31:37 -07:00
Peter Zhokhov	db314cdeda	Merge branch 'peterz_profile_vec_normalize' into peterz_migrate_rlalgs	2018-08-03 11:47:36 -07:00
Peter Zhokhov	b08c083d91	use VecNormalize with regular RunningMeanStd	2018-08-03 11:44:12 -07:00
Peter Zhokhov	bfbbe66d9e	profiling wip	2018-08-02 11:23:12 -07:00
Peter Zhokhov	1c5c6563b7	reverted VecNormalize to use RunningMeanStd (no tf)	2018-08-02 10:55:09 -07:00
Peter Zhokhov	1fa8c58da5	reverted VecNormalize to use RunningMeanStd (no tf)	2018-08-02 10:54:07 -07:00
Peter Zhokhov	f6d1115ead	reverted running_mean_std to user property decorators for mean, var, count	2018-08-02 10:32:22 -07:00
Peter Zhokhov	f6d5a47bed	use ncpu=1 for mujoco sessions - gives a bit of a performance speedup	2018-08-02 10:24:21 -07:00
Peter Zhokhov	c2df27bee4	non-tf normalization benchmark RUN BENCHMARKS	2018-08-02 09:41:41 -07:00
Peter Zhokhov	974c15756e	changed default ppo2 lr schedule to linear RUN BENCHMARKS	2018-08-01 16:24:44 -07:00
Peter Zhokhov	ad43fd9a35	add defaults	2018-08-01 16:15:59 -07:00
Peter Zhokhov	72c357c638	hardcode names of retro environments	2018-08-01 15:18:59 -07:00
Peter Zhokhov	e00e5ca016	run ppo_mpi benchmarks only RUN BENCHMARKS	2018-08-01 14:56:08 -07:00
Peter Zhokhov	705797f2f0	Merge branch 'peterz_migrate_rlalgs' into peterz_benchmarks	2018-08-01 14:46:40 -07:00
Peter Zhokhov	fcd84aa831	make_atari_env compatible with mpi	2018-08-01 14:46:18 -07:00
Peter Zhokhov	390b51597a	benchmarks on ppo2 only RUN BENCHMARKS	2018-08-01 11:01:50 -07:00
Peter Zhokhov	95104a3592	Merge branch 'peterz_migrate_rlalgs' into peterz_benchmarks	2018-08-01 10:50:29 -07:00
Peter Zhokhov	3528f7b992	save all variables to make sure we save the vec_normalize normalization	2018-08-01 10:12:19 -07:00
Peter Zhokhov	151e48009e	flake8 complaints	2018-07-31 16:25:12 -07:00
Peter Zhokhov	92f33335e9	dummy commit to RUN BENCHMARKS	2018-07-31 15:53:18 -07:00
Peter Zhokhov	af729cff15	dummy commit to RUN BENCHMARKS	2018-07-31 15:37:00 -07:00
Peter Zhokhov	10f815fe1d	fixed import in vec_normalize	2018-07-31 15:19:43 -07:00
Peter Zhokhov	8c4adac898	running_mean_std uses tensorflow variables	2018-07-31 14:45:55 -07:00
Peter Zhokhov	2a93ea8782	serialize variables as a dict, not as a list	2018-07-31 11:13:31 -07:00
Peter Zhokhov	9c48f9fad5	very dummy commit to RUN BENCHMARKS	2018-07-31 10:23:43 -07:00
Peter Zhokhov	348cbb4b71	dummy commit to RUN BENCHMARKS	2018-07-31 09:42:23 -07:00
Peter Zhokhov	a1602ab15f	dummy commit to RUN BENCHMARKS	2018-07-30 17:51:16 -07:00
Peter Zhokhov	e63e69bb14	dummy commit to RUN BENCHMARKS	2018-07-30 17:39:22 -07:00
Peter Zhokhov	385e7e5c0d	dummy commit to RUN BENCHMARKS	2018-07-30 17:21:05 -07:00
Peter Zhokhov	d112a2e49f	added approximate humanoid reward with ppo2 into the README for reference	2018-07-30 16:58:31 -07:00
Peter Zhokhov	e662dd6409	run.py can run algos from both baselines and rl_algs	2018-07-30 16:09:48 -07:00
Peter Zhokhov	efc6bffce3	replaced atari_arg_parser with common_arg_parser	2018-07-30 15:58:56 -07:00
Peter Zhokhov	872181d4c3	re-exported rl_algs - fixed problems with serialization test and test_cartpole	2018-07-30 15:49:48 -07:00
Peter Zhokhov	628ddecf6a	re-exported rl_algs	2018-07-30 12:15:46 -07:00
peter	83a4a4be65	run slow tests	2018-07-26 14:39:25 -07:00
peter	7edac38c73	more stuff from rl-algs	2018-07-26 14:26:57 -07:00
peter	a6dca44115	exported rl-algs	2018-07-26 14:02:04 -07:00
Karl Cobbe	622915c473	fix dtype for wrapper observation spaces	2018-06-12 14:48:39 -07:00
Karl Cobbe	a1d3c18ec0	fix dtype for wrapper observation spaces	2018-06-11 13:35:47 -07:00