From 2614f0f65a7764c7e65ccb9d19f86f45903d4bed Mon Sep 17 00:00:00 2001 From: Peter Zhokhov Date: Thu, 16 Aug 2018 12:18:06 -0700 Subject: [PATCH] update per-algorithm READMEs to reflect new way of running algorithms --- baselines/a2c/README.md | 2 +- baselines/acer/README.md | 2 +- baselines/acktr/README.md | 2 +- baselines/deepq/deepq.py | 2 +- baselines/ppo2/README.md | 5 +++-- baselines/trpo_mpi/README.md | 4 ++-- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/baselines/a2c/README.md b/baselines/a2c/README.md index 2df6eb2..957a338 100644 --- a/baselines/a2c/README.md +++ b/baselines/a2c/README.md @@ -2,4 +2,4 @@ - Original paper: https://arxiv.org/abs/1602.01783 - Baselines blog post: https://blog.openai.com/baselines-acktr-a2c/ -- `python -m baselines.a2c.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options. \ No newline at end of file +- `python -m baselines.run --alg=a2c --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options. diff --git a/baselines/acer/README.md b/baselines/acer/README.md index 7a53d75..33e24ff 100644 --- a/baselines/acer/README.md +++ b/baselines/acer/README.md @@ -1,4 +1,4 @@ # ACER - Original paper: https://arxiv.org/abs/1611.01224 -- `python -m baselines.acer.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options. \ No newline at end of file +- `python -m baselines.run --alg=acer --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options. diff --git a/baselines/acktr/README.md b/baselines/acktr/README.md index e8a806d..0458c5a 100644 --- a/baselines/acktr/README.md +++ b/baselines/acktr/README.md @@ -2,4 +2,4 @@ - Original paper: https://arxiv.org/abs/1708.05144 - Baselines blog post: https://blog.openai.com/baselines-acktr-a2c/ -- `python -m baselines.acktr.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options. \ No newline at end of file +- `python -m baselines.run --alg=acktr --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options. diff --git a/baselines/deepq/deepq.py b/baselines/deepq/deepq.py index 7d44acf..ff6b449 100644 --- a/baselines/deepq/deepq.py +++ b/baselines/deepq/deepq.py @@ -27,7 +27,7 @@ class ActWrapper(object): self.initial_state = None @staticmethod - def load_act(self, path): + def load_act(path): with open(path, "rb") as f: model_data, act_params = cloudpickle.load(f) act = deepq.build_act(**act_params) diff --git a/baselines/ppo2/README.md b/baselines/ppo2/README.md index 4c262ad..fd2c139 100644 --- a/baselines/ppo2/README.md +++ b/baselines/ppo2/README.md @@ -2,5 +2,6 @@ - Original paper: https://arxiv.org/abs/1707.06347 - Baselines blog post: https://blog.openai.com/openai-baselines-ppo/ -- `python -m baselines.ppo2.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options. -- `python -m baselines.ppo2.run_mujoco` runs the algorithm for 1M frames on a Mujoco environment. + +- `python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options. +- `python -m baselines.run --alg=ppo2 --env=Ant-v2 --num_timesteps=1e6` runs the algorithm for 1M frames on a Mujoco Ant environment. diff --git a/baselines/trpo_mpi/README.md b/baselines/trpo_mpi/README.md index b3d9b9d..75cf841 100644 --- a/baselines/trpo_mpi/README.md +++ b/baselines/trpo_mpi/README.md @@ -2,5 +2,5 @@ - Original paper: https://arxiv.org/abs/1502.05477 - Baselines blog post https://blog.openai.com/openai-baselines-ppo/ -- `mpirun -np 16 python -m baselines.trpo_mpi.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options. -- `python -m baselines.trpo_mpi.run_mujoco` runs the algorithm for 1M timesteps on a Mujoco environment. +- `mpirun -np 16 python -m baselines.run --alg=trpo_mpi --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options. +- `python -m baselines.run --alg=trpo_mpi --env=Ant-v2 --num_timesteps=1e6` runs the algorithm for 1M timesteps on a Mujoco Ant environment.