diff --git a/baselines/a2c/runner.py b/baselines/a2c/runner.py index 8d0c6ec..0181472 100644 --- a/baselines/a2c/runner.py +++ b/baselines/a2c/runner.py @@ -37,9 +37,6 @@ class Runner(AbstractEnvRunner): obs, rewards, dones, _ = self.env.step(actions) self.states = states self.dones = dones - for n, done in enumerate(dones): - if done: - self.obs[n] = self.obs[n]*0 self.obs = obs mb_rewards.append(rewards) mb_dones.append(self.dones) diff --git a/baselines/acer/acer.py b/baselines/acer/acer.py index 0ae0330..df4e0bf 100644 --- a/baselines/acer/acer.py +++ b/baselines/acer/acer.py @@ -75,8 +75,8 @@ class Model(object): train_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs*(nsteps+1),) + ob_space.shape) with tf.variable_scope('acer_model', reuse=tf.AUTO_REUSE): - step_model = policy(observ_placeholder=step_ob_placeholder, sess=sess) - train_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) + step_model = policy(nbatch=nenvs, nsteps=1, observ_placeholder=step_ob_placeholder, sess=sess) + train_model = policy(nbatch=nbatch, nsteps=nsteps, observ_placeholder=train_ob_placeholder, sess=sess) params = find_trainable_variables("acer_model") @@ -94,7 +94,7 @@ class Model(object): return v with tf.variable_scope("acer_model", custom_getter=custom_getter, reuse=True): - polyak_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) + polyak_model = policy(nbatch=nbatch, nsteps=nsteps, observ_placeholder=train_ob_placeholder, sess=sess) # Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i diff --git a/baselines/common/cmd_util.py b/baselines/common/cmd_util.py index 650911e..24b5b90 100644 --- a/baselines/common/cmd_util.py +++ b/baselines/common/cmd_util.py @@ -145,6 +145,7 @@ def common_arg_parser(): parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int) parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int) parser.add_argument('--play', default=False, action='store_true') + parser.add_argument('--extra_import', help='Extra module to import to access external environments', type=str, default=None) return parser def robotics_arg_parser(): diff --git a/baselines/gail/result/gail-result.md b/baselines/gail/result/gail-result.md index bda213c..8ecc9ed 100644 --- a/baselines/gail/result/gail-result.md +++ b/baselines/gail/result/gail-result.md @@ -24,7 +24,7 @@ Hopper-v1, Walker2d-v1, HalfCheetah-v1, Humanoid-v1, HumanoidStandup-v1. Every i For details (e.g., adversarial loss, discriminator accuracy, etc.) about GAIL training, please see [here](https://drive.google.com/drive/folders/1nnU8dqAV9i37-_5_vWIspyFUJFQLCsDD?usp=sharing) -### Determinstic Polciy (Set std=0) +### Determinstic Policy (Set std=0) | | Un-normalized | Normalized | |---|---|---| | Hopper-v1 | | | diff --git a/baselines/run.py b/baselines/run.py index a493071..cc63c11 100644 --- a/baselines/run.py +++ b/baselines/run.py @@ -120,6 +120,11 @@ def build_env(args): def get_env_type(env_id): + # Re-parse the gym registry, since we could have new envs since last time. + for env in gym.envs.registry.all(): + env_type = env._entry_point.split(':')[0].split('.')[-1] + _game_envs[env_type].add(env.id) # This is a set so add is idempotent + if env_id in _game_envs.keys(): env_type = env_id env_id = [g for g in _game_envs[env_type]][0] @@ -189,6 +194,9 @@ def main(args): args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) + if args.extra_import is not None: + import_module(args.extra_import) + if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() diff --git a/setup.py b/setup.py index 2e5e36a..130cdb5 100644 --- a/setup.py +++ b/setup.py @@ -53,11 +53,11 @@ setup(name='baselines', # ensure there is some tensorflow build with version above 1.4 import pkg_resources tf_pkg = None -for tf_pkg_name in ['tensorflow', 'tensorflow-gpu']: +for tf_pkg_name in ['tensorflow', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-gpu']: try: tf_pkg = pkg_resources.get_distribution(tf_pkg_name) except pkg_resources.DistributionNotFound: pass assert tf_pkg is not None, 'TensorFlow needed, of version above 1.4' -from distutils.version import StrictVersion -assert StrictVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= StrictVersion('1.4.0') +from distutils.version import LooseVersion +assert LooseVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= LooseVersion('1.4.0')