misc changes to vecenvs and run.py for benchmarks (#236)

* misc changes to vecenvs and run.py for benchmarks * dont seed global gen * update more references to assert_venvs_equal
2019-02-06 17:06:11 -08:00
parent 0dcaafd717
commit ecf5394226
6 changed files with 45 additions and 41 deletions
--- a/baselines/common/cmd_util.py
+++ b/baselines/common/cmd_util.py
@@ -136,6 +136,7 @@ def common_arg_parser():
    """
    parser = arg_parser()
    parser.add_argument('--env', help='environment ID', type=str, default='Reacher-v2')
+    parser.add_argument('--env_type', help='type of environment, used when the environment type cannot be automatically determined', type=str)
    parser.add_argument('--seed', help='RNG seed', type=int, default=None)
    parser.add_argument('--alg', help='Algorithm', type=str, default='ppo2')
    parser.add_argument('--num_timesteps', type=float, default=1e6),
--- a/baselines/common/vec_env/shmem_vec_env.py
+++ b/baselines/common/vec_env/shmem_vec_env.py
@@ -16,19 +16,18 @@ _NP_TO_CT = {np.float32: ctypes.c_float,
             np.uint8: ctypes.c_char,
             np.bool: ctypes.c_bool}

-ctx = mp.get_context('spawn')
-

 class ShmemVecEnv(VecEnv):
    """
    Optimized version of SubprocVecEnv that uses shared variables to communicate observations.
    """

-    def __init__(self, env_fns, spaces=None):
+    def __init__(self, env_fns, spaces=None, context='spawn'):
        """
        If you don't specify observation_space, we'll have to create a dummy
        environment to get it.
        """
+        ctx = mp.get_context(context)
        if spaces:
            observation_space, action_space = spaces
        else:
--- a/baselines/common/vec_env/subproc_vec_env.py
+++ b/baselines/common/vec_env/subproc_vec_env.py
@@ -3,7 +3,6 @@ import multiprocessing as mp
 import numpy as np
 from .vec_env import VecEnv, CloudpickleWrapper, clear_mpi_env_vars

-ctx = mp.get_context('spawn')

 def worker(remote, parent_remote, env_fn_wrapper):
    parent_remote.close()
@@ -39,7 +38,7 @@ class SubprocVecEnv(VecEnv):
    VecEnv that runs multiple environments in parallel in subproceses and communicates with them via pipes.
    Recommended to use when num_envs > 1 and step() can be a bottleneck.
    """
-    def __init__(self, env_fns, spaces=None):
+    def __init__(self, env_fns, spaces=None, context='spawn'):
        """
        Arguments:

@@ -48,6 +47,7 @@ class SubprocVecEnv(VecEnv):
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
+        ctx = mp.get_context(context)
        self.remotes, self.work_remotes = zip(*[ctx.Pipe() for _ in range(nenvs)])
        self.ps = [ctx.Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
--- a/baselines/common/vec_env/test_vec_env.py
+++ b/baselines/common/vec_env/test_vec_env.py
@@ -11,37 +11,37 @@ from .subproc_vec_env import SubprocVecEnv
 from baselines.common.tests.test_with_mpi import with_mpi


-def assert_envs_equal(env1, env2, num_steps):
+def assert_venvs_equal(venv1, venv2, num_steps):
    """
    Compare two environments over num_steps steps and make sure
    that the observations produced by each are the same when given
    the same actions.
    """
-    assert env1.num_envs == env2.num_envs
-    assert env1.action_space.shape == env2.action_space.shape
-    assert env1.action_space.dtype == env2.action_space.dtype
-    joint_shape = (env1.num_envs,) + env1.action_space.shape
+    assert venv1.num_envs == venv2.num_envs
+    assert venv1.observation_space.shape == venv2.observation_space.shape
+    assert venv1.observation_space.dtype == venv2.observation_space.dtype
+    assert venv1.action_space.shape == venv2.action_space.shape
+    assert venv1.action_space.dtype == venv2.action_space.dtype

    try:
-        obs1, obs2 = env1.reset(), env2.reset()
+        obs1, obs2 = venv1.reset(), venv2.reset()
        assert np.array(obs1).shape == np.array(obs2).shape
-        assert np.array(obs1).shape == joint_shape
+        assert np.array(obs1).shape == (venv1.num_envs,) + venv1.observation_space.shape
        assert np.allclose(obs1, obs2)
-        np.random.seed(1337)
+        venv1.action_space.seed(1337)
        for _ in range(num_steps):
-            actions = np.array(np.random.randint(0, 0x100, size=joint_shape),
-                               dtype=env1.action_space.dtype)
-            for env in [env1, env2]:
-                env.step_async(actions)
-            outs1 = env1.step_wait()
-            outs2 = env2.step_wait()
+            actions = np.array([venv1.action_space.sample() for _ in range(venv1.num_envs)])
+            for venv in [venv1, venv2]:
+                venv.step_async(actions)
+            outs1 = venv1.step_wait()
+            outs2 = venv2.step_wait()
            for out1, out2 in zip(outs1[:3], outs2[:3]):
                assert np.array(out1).shape == np.array(out2).shape
                assert np.allclose(out1, out2)
            assert list(outs1[3]) == list(outs2[3])
    finally:
-        env1.close()
-        env2.close()
+        venv1.close()
+        venv2.close()


@pytest.mark.parametrize('klass', (ShmemVecEnv, SubprocVecEnv))
@@ -64,7 +64,7 @@ def test_vec_env(klass, dtype):  # pylint: disable=R0914
    fns = [make_fn(i) for i in range(num_envs)]
    env1 = DummyVecEnv(fns)
    env2 = klass(fns)
-    assert_envs_equal(env1, env2, num_steps=num_steps)
+    assert_venvs_equal(env1, env2, num_steps=num_steps)


 class SimpleEnv(gym.Env):
--- a/baselines/common/vec_env/vec_env.py
+++ b/baselines/common/vec_env/vec_env.py
@@ -217,4 +217,4 @@ def clear_mpi_env_vars():
    try:
        yield
    finally:
-        os.environ.update(removed_environment)
+        os.environ.update(removed_environment)
--- a/baselines/run.py
+++ b/baselines/run.py
@@ -6,15 +6,13 @@ from collections import defaultdict
 import tensorflow as tf
 import numpy as np

+from baselines.common.vec_env import VecFrameStack, VecNormalize
 from baselines.common.vec_env.vec_video_recorder import VecVideoRecorder
-from baselines.common.vec_env.vec_frame_stack import VecFrameStack
 from baselines.common.cmd_util import common_arg_parser, parse_unknown_args, make_vec_env, make_env
 from baselines.common.tf_util import get_session
 from baselines import logger
 from importlib import import_module

-from baselines.common.vec_env.vec_normalize import VecNormalize
-
 try:
    from mpi4py import MPI
 except ImportError:
@@ -52,7 +50,7 @@ _game_envs['retro'] = {


 def train(args, extra_args):
-    env_type, env_id = get_env_type(args.env)
+    env_type, env_id = get_env_type(args)
    print('env_type: {}'.format(env_type))

    total_timesteps = int(args.num_timesteps)
@@ -91,7 +89,7 @@ def build_env(args):
    alg = args.alg
    seed = args.seed

-    env_type, env_id = get_env_type(args.env)
+    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
@@ -104,22 +102,27 @@ def build_env(args):
            env = VecFrameStack(env, frame_stack_size)

    else:
-       config = tf.ConfigProto(allow_soft_placement=True,
+        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
-       config.gpu_options.allow_growth = True
-       get_session(config=config)
+        config.gpu_options.allow_growth = True
+        get_session(config=config)

-       flatten_dict_observations = alg not in {'her'}
-       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
+        flatten_dict_observations = alg not in {'her'}
+        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

-       if env_type == 'mujoco':
-           env = VecNormalize(env)
+        if env_type == 'mujoco':
+            env = VecNormalize(env)

    return env


-def get_env_type(env_id):
+def get_env_type(args):
+    env_id = args.env
+
+    if args.env_type is not None:
+        return args.env_type, env_id
+
    # Re-parse the gym registry, since we could have new envs since last time.
    for env in gym.envs.registry.all():
        env_type = env._entry_point.split(':')[0].split('.')[-1]
@@ -205,7 +208,6 @@ def main(args):
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)
-    env.close()

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
@@ -213,26 +215,28 @@ def main(args):

    if args.play:
        logger.log("Running trained model")
-        env = build_env(args)
        obs = env.reset()

        state = model.initial_state if hasattr(model, 'initial_state') else None
        dones = np.zeros((1,))

+        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs,S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

-            obs, _, done, _ = env.step(actions)
+            obs, rew, done, _ = env.step(actions)
+            episode_rew += rew[0]
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
-
            if done:
+                print(f'episode_rew={episode_rew}')
+                episode_rew = 0
                obs = env.reset()

-        env.close()
+    env.close()

    return model