Compare commits
5 Commits
peterz_tfl
...
peterz_ubu
Author | SHA1 | Date | |
---|---|---|---|
|
11176eedce | ||
|
b222dd0610 | ||
|
1870685071 | ||
|
8c2aea2add | ||
|
366f486e34 |
@@ -1 +1 @@
|
||||
ppo2
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
FROM ubuntu:16.04
|
||||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get -y update && apt-get -y install git wget python-dev python3-dev libopenmpi-dev python-pip zlib1g-dev cmake python-opencv
|
||||
ENV CODE_DIR /root/code
|
||||
|
18
README.md
18
README.md
@@ -112,10 +112,6 @@ This should get to the mean reward per episode about 5k. To load and visualize t
|
||||
*NOTE:* At the moment Mujoco training uses VecNormalize wrapper for the environment which is not being saved correctly; so loading the models trained on Mujoco will not work well if the environment is recreated. If necessary, you can work around that by replacing RunningMeanStd by TfRunningMeanStd in [baselines/common/vec_env/vec_normalize.py](baselines/common/vec_env/vec_normalize.py#L12). This way, mean and std of environment normalizing wrapper will be saved in tensorflow variables and included in the model file; however, training is slower that way - hence not including it by default
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Subpackages
|
||||
|
||||
- [A2C](baselines/a2c)
|
||||
@@ -125,10 +121,19 @@ This should get to the mean reward per episode about 5k. To load and visualize t
|
||||
- [DQN](baselines/deepq)
|
||||
- [GAIL](baselines/gail)
|
||||
- [HER](baselines/her)
|
||||
- [PPO1](baselines/ppo1) (Multi-CPU using MPI)
|
||||
- [PPO2](baselines/ppo2) (Optimized for GPU)
|
||||
- [PPO1](baselines/ppo1) (obsolete version, left here temporarily)
|
||||
- [PPO2](baselines/ppo2)
|
||||
- [TRPO](baselines/trpo_mpi)
|
||||
|
||||
|
||||
|
||||
## Benchmarks
|
||||
Results of benchmarks on Mujoco (1M timesteps) and Atari (10M timesteps) are available
|
||||
[here for Mujoco](https://htmlpreview.github.com/?https://github.com/openai/baselines/blob/master/benchmarks_mujoco1M.htm)
|
||||
and
|
||||
[here for Atari](https://htmlpreview.github.com/?https://github.com/openai/baselines/blob/master/benchmarks_atari10M.htm)
|
||||
respectively. Note that these results may be not on the latest version of the code, particular commit hash with which results were obtained is specified on the benchmarks page.
|
||||
|
||||
To cite this repository in publications:
|
||||
|
||||
@misc{baselines,
|
||||
@@ -139,3 +144,4 @@ To cite this repository in publications:
|
||||
journal = {GitHub repository},
|
||||
howpublished = {\url{https://github.com/openai/baselines}},
|
||||
}
|
||||
|
||||
|
@@ -156,7 +156,7 @@ class FrameStack(gym.Wrapper):
|
||||
self.k = k
|
||||
self.frames = deque([], maxlen=k)
|
||||
shp = env.observation_space.shape
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=np.uint8)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)
|
||||
|
||||
def reset(self):
|
||||
ob = self.env.reset()
|
||||
@@ -176,6 +176,7 @@ class FrameStack(gym.Wrapper):
|
||||
class ScaledFloatFrame(gym.ObservationWrapper):
|
||||
def __init__(self, env):
|
||||
gym.ObservationWrapper.__init__(self, env)
|
||||
self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
|
||||
|
||||
def observation(self, observation):
|
||||
# careful! This undoes the memory optimization, use
|
||||
|
@@ -138,7 +138,7 @@ def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs):
|
||||
'''
|
||||
|
||||
def network_fn(X):
|
||||
out = X
|
||||
out = tf.cast(X, tf.float32) / 255.
|
||||
with tf.variable_scope("convnet"):
|
||||
for num_outputs, kernel_size, stride in convs:
|
||||
out = layers.convolution2d(out,
|
||||
|
@@ -6,8 +6,7 @@ from baselines.run import get_learn_function
|
||||
|
||||
common_kwargs = dict(
|
||||
seed=0,
|
||||
total_timesteps=20000,
|
||||
nlstm=64
|
||||
total_timesteps=50000,
|
||||
)
|
||||
|
||||
learn_kwargs = {
|
||||
@@ -20,7 +19,7 @@ learn_kwargs = {
|
||||
|
||||
|
||||
alg_list = learn_kwargs.keys()
|
||||
rnn_list = ['lstm', 'tflstm', 'tflstm_static']
|
||||
rnn_list = ['lstm']
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("alg", alg_list)
|
||||
@@ -42,11 +41,11 @@ def test_fixed_sequence(alg, rnn):
|
||||
**kwargs
|
||||
)
|
||||
|
||||
simple_test(env_fn, learn, 0.3)
|
||||
simple_test(env_fn, learn, 0.7)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_fixed_sequence('ppo2', 'tflstm')
|
||||
test_fixed_sequence('ppo2', 'lstm')
|
||||
|
||||
|
||||
|
||||
|
@@ -2,7 +2,6 @@ import tensorflow as tf
|
||||
import numpy as np
|
||||
from gym.spaces import np_random
|
||||
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
|
||||
from baselines.bench.monitor import Monitor
|
||||
|
||||
N_TRIALS = 10000
|
||||
N_EPISODES = 100
|
||||
@@ -11,7 +10,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
||||
np.random.seed(0)
|
||||
np_random.seed(0)
|
||||
|
||||
env = DummyVecEnv([lambda: Monitor(env_fn(), None, allow_early_resets=True)])
|
||||
env = DummyVecEnv([env_fn])
|
||||
|
||||
|
||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
||||
|
@@ -32,7 +32,7 @@ In particular notice that once `deepq.learn` finishes training it returns `act`
|
||||
|
||||
|
||||
- [baselines/deepq/experiments/custom_cartpole.py](experiments/custom_cartpole.py) - Cartpole training with more fine grained control over the internals of DQN algorithm.
|
||||
- [baselines/deepq/experiments/atari/train.py](experiments/atari/train.py) - more robust setup for training at scale.
|
||||
- [baselines/deepq/experiments/run_atari.py](experiments/run_atari.py) - more robust setup for training at scale.
|
||||
|
||||
|
||||
##### Download a pretrained Atari agent
|
||||
|
12351
benchmarks_atari10M.htm
Normal file
12351
benchmarks_atari10M.htm
Normal file
File diff suppressed because it is too large
Load Diff
5640
benchmarks_mujoco1M.htm
Normal file
5640
benchmarks_mujoco1M.htm
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user