From 2614f0f65a7764c7e65ccb9d19f86f45903d4bed Mon Sep 17 00:00:00 2001
From: Peter Zhokhov <peterz@openai.com>
Date: Thu, 16 Aug 2018 12:18:06 -0700
Subject: [PATCH] update per-algorithm READMEs to reflect new way of running
 algorithms

---
 baselines/a2c/README.md      | 2 +-
 baselines/acer/README.md     | 2 +-
 baselines/acktr/README.md    | 2 +-
 baselines/deepq/deepq.py     | 2 +-
 baselines/ppo2/README.md     | 5 +++--
 baselines/trpo_mpi/README.md | 4 ++--
 6 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/baselines/a2c/README.md b/baselines/a2c/README.md
index 2df6eb2..957a338 100644
--- a/baselines/a2c/README.md
+++ b/baselines/a2c/README.md
@@ -2,4 +2,4 @@
 
 - Original paper: https://arxiv.org/abs/1602.01783
 - Baselines blog post: https://blog.openai.com/baselines-acktr-a2c/
-- `python -m baselines.a2c.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options.
\ No newline at end of file
+- `python -m baselines.run --alg=a2c --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options.
diff --git a/baselines/acer/README.md b/baselines/acer/README.md
index 7a53d75..33e24ff 100644
--- a/baselines/acer/README.md
+++ b/baselines/acer/README.md
@@ -1,4 +1,4 @@
 # ACER
 
 - Original paper: https://arxiv.org/abs/1611.01224
-- `python -m baselines.acer.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options.
\ No newline at end of file
+- `python -m baselines.run --alg=acer --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options.
diff --git a/baselines/acktr/README.md b/baselines/acktr/README.md
index e8a806d..0458c5a 100644
--- a/baselines/acktr/README.md
+++ b/baselines/acktr/README.md
@@ -2,4 +2,4 @@
 
 - Original paper: https://arxiv.org/abs/1708.05144
 - Baselines blog post: https://blog.openai.com/baselines-acktr-a2c/
-- `python -m baselines.acktr.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options.
\ No newline at end of file
+- `python -m baselines.run --alg=acktr --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options.
diff --git a/baselines/deepq/deepq.py b/baselines/deepq/deepq.py
index 7d44acf..ff6b449 100644
--- a/baselines/deepq/deepq.py
+++ b/baselines/deepq/deepq.py
@@ -27,7 +27,7 @@ class ActWrapper(object):
         self.initial_state = None
 
     @staticmethod
-    def load_act(self, path):
+    def load_act(path):
         with open(path, "rb") as f:
             model_data, act_params = cloudpickle.load(f)
         act = deepq.build_act(**act_params)
diff --git a/baselines/ppo2/README.md b/baselines/ppo2/README.md
index 4c262ad..fd2c139 100644
--- a/baselines/ppo2/README.md
+++ b/baselines/ppo2/README.md
@@ -2,5 +2,6 @@
 
 - Original paper: https://arxiv.org/abs/1707.06347
 - Baselines blog post: https://blog.openai.com/openai-baselines-ppo/
-- `python -m baselines.ppo2.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options.
-- `python -m baselines.ppo2.run_mujoco` runs the algorithm for 1M frames on a Mujoco environment.
+
+- `python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options.
+- `python -m baselines.run --alg=ppo2 --env=Ant-v2 --num_timesteps=1e6` runs the algorithm for 1M frames on a Mujoco Ant environment.
diff --git a/baselines/trpo_mpi/README.md b/baselines/trpo_mpi/README.md
index b3d9b9d..75cf841 100644
--- a/baselines/trpo_mpi/README.md
+++ b/baselines/trpo_mpi/README.md
@@ -2,5 +2,5 @@
 
 - Original paper: https://arxiv.org/abs/1502.05477
 - Baselines blog post https://blog.openai.com/openai-baselines-ppo/
-- `mpirun -np 16 python -m baselines.trpo_mpi.run_atari` runs the algorithm for 40M frames = 10M timesteps on an Atari game. See help (`-h`) for more options.
-- `python -m baselines.trpo_mpi.run_mujoco` runs the algorithm for 1M timesteps on a Mujoco environment.
+- `mpirun -np 16 python -m baselines.run --alg=trpo_mpi --env=PongNoFrameskip-v4` runs the algorithm for 40M frames = 10M timesteps on an Atari Pong. See help (`-h`) for more options.
+- `python -m baselines.run --alg=trpo_mpi --env=Ant-v2 --num_timesteps=1e6` runs the algorithm for 1M timesteps on a Mujoco Ant environment.