diff --git a/baselines/a2c/runner.py b/baselines/a2c/runner.py
index 8d0c6ec..0181472 100644
--- a/baselines/a2c/runner.py
+++ b/baselines/a2c/runner.py
@@ -37,9 +37,6 @@ class Runner(AbstractEnvRunner):
obs, rewards, dones, _ = self.env.step(actions)
self.states = states
self.dones = dones
- for n, done in enumerate(dones):
- if done:
- self.obs[n] = self.obs[n]*0
self.obs = obs
mb_rewards.append(rewards)
mb_dones.append(self.dones)
diff --git a/baselines/acer/acer.py b/baselines/acer/acer.py
index 0ae0330..df4e0bf 100644
--- a/baselines/acer/acer.py
+++ b/baselines/acer/acer.py
@@ -75,8 +75,8 @@ class Model(object):
train_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs*(nsteps+1),) + ob_space.shape)
with tf.variable_scope('acer_model', reuse=tf.AUTO_REUSE):
- step_model = policy(observ_placeholder=step_ob_placeholder, sess=sess)
- train_model = policy(observ_placeholder=train_ob_placeholder, sess=sess)
+ step_model = policy(nbatch=nenvs, nsteps=1, observ_placeholder=step_ob_placeholder, sess=sess)
+ train_model = policy(nbatch=nbatch, nsteps=nsteps, observ_placeholder=train_ob_placeholder, sess=sess)
params = find_trainable_variables("acer_model")
@@ -94,7 +94,7 @@ class Model(object):
return v
with tf.variable_scope("acer_model", custom_getter=custom_getter, reuse=True):
- polyak_model = policy(observ_placeholder=train_ob_placeholder, sess=sess)
+ polyak_model = policy(nbatch=nbatch, nsteps=nsteps, observ_placeholder=train_ob_placeholder, sess=sess)
# Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i
diff --git a/baselines/common/cmd_util.py b/baselines/common/cmd_util.py
index 650911e..24b5b90 100644
--- a/baselines/common/cmd_util.py
+++ b/baselines/common/cmd_util.py
@@ -145,6 +145,7 @@ def common_arg_parser():
parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int)
parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int)
parser.add_argument('--play', default=False, action='store_true')
+ parser.add_argument('--extra_import', help='Extra module to import to access external environments', type=str, default=None)
return parser
def robotics_arg_parser():
diff --git a/baselines/gail/result/gail-result.md b/baselines/gail/result/gail-result.md
index bda213c..8ecc9ed 100644
--- a/baselines/gail/result/gail-result.md
+++ b/baselines/gail/result/gail-result.md
@@ -24,7 +24,7 @@ Hopper-v1, Walker2d-v1, HalfCheetah-v1, Humanoid-v1, HumanoidStandup-v1. Every i
For details (e.g., adversarial loss, discriminator accuracy, etc.) about GAIL training, please see [here](https://drive.google.com/drive/folders/1nnU8dqAV9i37-_5_vWIspyFUJFQLCsDD?usp=sharing)
-### Determinstic Polciy (Set std=0)
+### Determinstic Policy (Set std=0)
| | Un-normalized | Normalized |
|---|---|---|
| Hopper-v1 |
|
|
diff --git a/baselines/run.py b/baselines/run.py
index a493071..cc63c11 100644
--- a/baselines/run.py
+++ b/baselines/run.py
@@ -120,6 +120,11 @@ def build_env(args):
def get_env_type(env_id):
+ # Re-parse the gym registry, since we could have new envs since last time.
+ for env in gym.envs.registry.all():
+ env_type = env._entry_point.split(':')[0].split('.')[-1]
+ _game_envs[env_type].add(env.id) # This is a set so add is idempotent
+
if env_id in _game_envs.keys():
env_type = env_id
env_id = [g for g in _game_envs[env_type]][0]
@@ -189,6 +194,9 @@ def main(args):
args, unknown_args = arg_parser.parse_known_args(args)
extra_args = parse_cmdline_kwargs(unknown_args)
+ if args.extra_import is not None:
+ import_module(args.extra_import)
+
if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
rank = 0
logger.configure()
diff --git a/setup.py b/setup.py
index 2e5e36a..130cdb5 100644
--- a/setup.py
+++ b/setup.py
@@ -53,11 +53,11 @@ setup(name='baselines',
# ensure there is some tensorflow build with version above 1.4
import pkg_resources
tf_pkg = None
-for tf_pkg_name in ['tensorflow', 'tensorflow-gpu']:
+for tf_pkg_name in ['tensorflow', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-gpu']:
try:
tf_pkg = pkg_resources.get_distribution(tf_pkg_name)
except pkg_resources.DistributionNotFound:
pass
assert tf_pkg is not None, 'TensorFlow needed, of version above 1.4'
-from distutils.version import StrictVersion
-assert StrictVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= StrictVersion('1.4.0')
+from distutils.version import LooseVersion
+assert LooseVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= LooseVersion('1.4.0')