Compare commits

..

42 Commits

Author SHA1 Message Date
Peter Zhokhov
841da92f4d add code coverage report 2018-08-13 10:44:49 -07:00
Peter Zhokhov
624231827c merged benchmarks branch 2018-08-13 09:28:10 -07:00
Peter Zhokhov
1e40ec22be dummy commit to RUN BENCHMARKS 2018-08-08 10:45:18 -07:00
Peter Zhokhov
701a36cdfa added a note in README about TfRunningMeanStd and serialization of VecNormalize 2018-08-08 10:44:58 -07:00
Peter Zhokhov
5a7f9847d8 flake8 complaints 2018-08-03 13:59:58 -07:00
Peter Zhokhov
b63134e5c5 added acer runner (missing import) 2018-08-03 13:31:37 -07:00
Peter Zhokhov
db314cdeda Merge branch 'peterz_profile_vec_normalize' into peterz_migrate_rlalgs 2018-08-03 11:47:36 -07:00
Peter Zhokhov
b08c083d91 use VecNormalize with regular RunningMeanStd 2018-08-03 11:44:12 -07:00
Peter Zhokhov
bfbbe66d9e profiling wip 2018-08-02 11:23:12 -07:00
Peter Zhokhov
1c5c6563b7 reverted VecNormalize to use RunningMeanStd (no tf) 2018-08-02 10:55:09 -07:00
Peter Zhokhov
1fa8c58da5 reverted VecNormalize to use RunningMeanStd (no tf) 2018-08-02 10:54:07 -07:00
Peter Zhokhov
f6d1115ead reverted running_mean_std to user property decorators for mean, var, count 2018-08-02 10:32:22 -07:00
Peter Zhokhov
f6d5a47bed use ncpu=1 for mujoco sessions - gives a bit of a performance speedup 2018-08-02 10:24:21 -07:00
Peter Zhokhov
c2df27bee4 non-tf normalization benchmark RUN BENCHMARKS 2018-08-02 09:41:41 -07:00
Peter Zhokhov
974c15756e changed default ppo2 lr schedule to linear RUN BENCHMARKS 2018-08-01 16:24:44 -07:00
Peter Zhokhov
ad43fd9a35 add defaults 2018-08-01 16:15:59 -07:00
Peter Zhokhov
72c357c638 hardcode names of retro environments 2018-08-01 15:18:59 -07:00
Peter Zhokhov
e00e5ca016 run ppo_mpi benchmarks only RUN BENCHMARKS 2018-08-01 14:56:08 -07:00
Peter Zhokhov
705797f2f0 Merge branch 'peterz_migrate_rlalgs' into peterz_benchmarks 2018-08-01 14:46:40 -07:00
Peter Zhokhov
fcd84aa831 make_atari_env compatible with mpi 2018-08-01 14:46:18 -07:00
Peter Zhokhov
390b51597a benchmarks on ppo2 only RUN BENCHMARKS 2018-08-01 11:01:50 -07:00
Peter Zhokhov
95104a3592 Merge branch 'peterz_migrate_rlalgs' into peterz_benchmarks 2018-08-01 10:50:29 -07:00
Peter Zhokhov
3528f7b992 save all variables to make sure we save the vec_normalize normalization 2018-08-01 10:12:19 -07:00
Peter Zhokhov
151e48009e flake8 complaints 2018-07-31 16:25:12 -07:00
Peter Zhokhov
92f33335e9 dummy commit to RUN BENCHMARKS 2018-07-31 15:53:18 -07:00
Peter Zhokhov
af729cff15 dummy commit to RUN BENCHMARKS 2018-07-31 15:37:00 -07:00
Peter Zhokhov
10f815fe1d fixed import in vec_normalize 2018-07-31 15:19:43 -07:00
Peter Zhokhov
8c4adac898 running_mean_std uses tensorflow variables 2018-07-31 14:45:55 -07:00
Peter Zhokhov
2a93ea8782 serialize variables as a dict, not as a list 2018-07-31 11:13:31 -07:00
Peter Zhokhov
9c48f9fad5 very dummy commit to RUN BENCHMARKS 2018-07-31 10:23:43 -07:00
Peter Zhokhov
348cbb4b71 dummy commit to RUN BENCHMARKS 2018-07-31 09:42:23 -07:00
Peter Zhokhov
a1602ab15f dummy commit to RUN BENCHMARKS 2018-07-30 17:51:16 -07:00
Peter Zhokhov
e63e69bb14 dummy commit to RUN BENCHMARKS 2018-07-30 17:39:22 -07:00
Peter Zhokhov
385e7e5c0d dummy commit to RUN BENCHMARKS 2018-07-30 17:21:05 -07:00
Peter Zhokhov
d112a2e49f added approximate humanoid reward with ppo2 into the README for reference 2018-07-30 16:58:31 -07:00
Peter Zhokhov
e662dd6409 run.py can run algos from both baselines and rl_algs 2018-07-30 16:09:48 -07:00
Peter Zhokhov
efc6bffce3 replaced atari_arg_parser with common_arg_parser 2018-07-30 15:58:56 -07:00
Peter Zhokhov
872181d4c3 re-exported rl_algs - fixed problems with serialization test and test_cartpole 2018-07-30 15:49:48 -07:00
Peter Zhokhov
628ddecf6a re-exported rl_algs 2018-07-30 12:15:46 -07:00
peter
83a4a4be65 run slow tests 2018-07-26 14:39:25 -07:00
peter
7edac38c73 more stuff from rl-algs 2018-07-26 14:26:57 -07:00
peter
a6dca44115 exported rl-algs 2018-07-26 14:02:04 -07:00
8 changed files with 13 additions and 11 deletions

View File

@@ -1 +1 @@
ppo2

1
.gitignore vendored
View File

@@ -5,6 +5,7 @@
.pytest_cache
.DS_Store
.idea
.coverage
# Setuptools distribution and build folders.
/dist/

View File

@@ -139,3 +139,4 @@ To cite this repository in publications:
journal = {GitHub repository},
howpublished = {\url{https://github.com/openai/baselines}},
}

View File

@@ -156,7 +156,7 @@ class FrameStack(gym.Wrapper):
self.k = k
self.frames = deque([], maxlen=k)
shp = env.observation_space.shape
self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=np.uint8)
self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)
def reset(self):
ob = self.env.reset()
@@ -176,6 +176,7 @@ class FrameStack(gym.Wrapper):
class ScaledFloatFrame(gym.ObservationWrapper):
def __init__(self, env):
gym.ObservationWrapper.__init__(self, env)
self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
def observation(self, observation):
# careful! This undoes the memory optimization, use

View File

@@ -138,7 +138,7 @@ def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs):
'''
def network_fn(X):
out = X
out = tf.cast(X, tf.float32) / 255.
with tf.variable_scope("convnet"):
for num_outputs, kernel_size, stride in convs:
out = layers.convolution2d(out,

View File

@@ -6,8 +6,7 @@ from baselines.run import get_learn_function
common_kwargs = dict(
seed=0,
total_timesteps=20000,
nlstm=64
total_timesteps=50000,
)
learn_kwargs = {
@@ -20,7 +19,7 @@ learn_kwargs = {
alg_list = learn_kwargs.keys()
rnn_list = ['lstm', 'tflstm', 'tflstm_static']
rnn_list = ['lstm']
@pytest.mark.slow
@pytest.mark.parametrize("alg", alg_list)
@@ -42,11 +41,11 @@ def test_fixed_sequence(alg, rnn):
**kwargs
)
simple_test(env_fn, learn, 0.3)
simple_test(env_fn, learn, 0.7)
if __name__ == '__main__':
test_fixed_sequence('ppo2', 'tflstm')
test_fixed_sequence('ppo2', 'lstm')

View File

@@ -2,7 +2,6 @@ import tensorflow as tf
import numpy as np
from gym.spaces import np_random
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.bench.monitor import Monitor
N_TRIALS = 10000
N_EPISODES = 100
@@ -11,7 +10,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
np.random.seed(0)
np_random.seed(0)
env = DummyVecEnv([lambda: Monitor(env_fn(), None, allow_early_resets=True)])
env = DummyVecEnv([env_fn])
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():

View File

@@ -25,7 +25,8 @@ setup(name='baselines',
extras_require={
'test': [
'filelock',
'pytest'
'pytest',
'pytest-cov',
]
},
description='OpenAI baselines: high quality implementations of reinforcement learning algorithms',