test fixes

fix tests - add matplotlib to setup_requires, put mpi4py import in try-except
merge master
2019-05-03 16:36:03 -07:00 · 2019-05-03 16:29:10 -07:00 · 2019-05-03 15:57:31 -07:00 · 2019-05-03 15:56:04 -07:00 · 2019-05-03 15:54:27 -07:00 · 2019-05-03 15:54:26 -07:00
23 changed files with 162 additions and 165 deletions
--- a/README.md
+++ b/README.md
@@ -39,9 +39,6 @@ To activate a virtualenv:
 More thorough tutorial on virtualenvs and options can be found [here](https://virtualenv.pypa.io/en/stable/) 


-## Tensorflow versions
-The master branch supports Tensorflow from version 1.4 to 1.14. For Tensorflow 2.0 support, please use tf-2 branch.
-
 ## Installation
 - Clone the repo and cd into it:
    ```bash
@@ -101,8 +98,6 @@ python -m baselines.run --alg=deepq --env=PongNoFrameskip-v4 --num_timesteps=1e6
 ```

 ## Saving, loading and visualizing models
-
-### Saving and loading the model
 The algorithms serialization API is not properly unified yet; however, there is a simple method to save / restore trained models. 
 `--save_path` and `--load_path` command-line option loads the tensorflow state from a given path before training, and saves it after the training, respectively. 
 Let's imagine you'd like to train ppo2 on Atari Pong,  save the model and then later visualize what has it learnt.
@@ -116,17 +111,8 @@ python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 --

 *NOTE:* Mujoco environments require normalization to work properly, so we wrap them with VecNormalize wrapper. Currently, to ensure the models are saved with normalization (so that trained models can be restored and run without further training) the normalization coefficients are saved as tensorflow variables. This can decrease the performance somewhat, so if you require high-throughput steps with Mujoco and do not need saving/restoring the models, it may make sense to use numpy normalization instead. To do that, set 'use_tf=False` in [baselines/run.py](baselines/run.py#L116). 

-### Logging and vizualizing learning curves and other training metrics
-By default, all summary data, including progress, standard output, is saved to a unique directory in a temp folder, specified by a call to Python's [tempfile.gettempdir()](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir).
-The directory can be changed with the `--log_path` command-line option.
-```bash
-python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=2e7 --save_path=~/models/pong_20M_ppo2 --log_path=~/logs/Pong/
-```
-*NOTE:* Please be aware that the logger will overwrite files of the same name in an existing directory, thus it's recommended that folder names be given a unique timestamp to prevent overwritten logs.
-
-Another way the temp directory can be changed is through the use of the `$OPENAI_LOGDIR` environment variable.
-
-For examples on how to load and display the training data, see [here](docs/viz/viz.ipynb).
+## Loading and vizualizing learning curves and other training metrics
+See [here](docs/viz/viz.ipynb) for instructions on how to load and display the training data. 

 ## Subpackages

--- a/baselines/bench/init.py
+++ b/baselines/bench/init.py
@@ -1,3 +1,2 @@
-# flake8: noqa F403
 from baselines.bench.benchmarks import *
 from baselines.bench.monitor import *
--- a/baselines/bench/benchmarks.py
+++ b/baselines/bench/benchmarks.py
@@ -1,4 +1,5 @@
 import re
+import os.path as osp
 import os
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

--- a/baselines/bench/monitor.py
+++ b/baselines/bench/monitor.py
@@ -1,11 +1,13 @@
 __all__ = ['Monitor', 'get_monitor_files', 'load_results']

+import gym
 from gym.core import Wrapper
 import time
 from glob import glob
 import csv
 import os.path as osp
 import json
+import numpy as np

 class Monitor(Wrapper):
    EXT = "monitor.csv"
@@ -160,3 +162,27 @@ def load_results(dir):
    df['t'] -= min(header['t_start'] for header in headers)
    df.headers = headers # HACK to preserve backwards compatibility
    return df
+
+def test_monitor():
+    env = gym.make("CartPole-v1")
+    env.seed(0)
+    mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4()
+    menv = Monitor(env, mon_file)
+    menv.reset()
+    for _ in range(1000):
+        _, _, done, _ = menv.step(0)
+        if done:
+            menv.reset()
+
+    f = open(mon_file, 'rt')
+
+    firstline = f.readline()
+    assert firstline.startswith('#')
+    metadata = json.loads(firstline[1:])
+    assert metadata['env_id'] == "CartPole-v1"
+    assert set(metadata.keys()) == {'env_id', 'gym_version', 't_start'},  "Incorrect keys in monitor metadata"
+
+    last_logline = pandas.read_csv(f, index_col=None)
+    assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline"
+    f.close()
+    os.remove(mon_file)
--- a/baselines/bench/test_monitor.py
+++ b/baselines/bench/test_monitor.py
@@ -1,31 +0,0 @@
-from .monitor import Monitor
-import gym
-import json
-
-def test_monitor():
-    import pandas
-    import os
-    import uuid
-
-    env = gym.make("CartPole-v1")
-    env.seed(0)
-    mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4()
-    menv = Monitor(env, mon_file)
-    menv.reset()
-    for _ in range(1000):
-        _, _, done, _ = menv.step(0)
-        if done:
-            menv.reset()
-
-    f = open(mon_file, 'rt')
-
-    firstline = f.readline()
-    assert firstline.startswith('#')
-    metadata = json.loads(firstline[1:])
-    assert metadata['env_id'] == "CartPole-v1"
-    assert set(metadata.keys()) == {'env_id', 't_start'},  "Incorrect keys in monitor metadata"
-
-    last_logline = pandas.read_csv(f, index_col=None)
-    assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline"
-    f.close()
-    os.remove(mon_file)
--- a/baselines/common/atari_wrappers.py
+++ b/baselines/common/atari_wrappers.py
@@ -254,13 +254,6 @@ class LazyFrames(object):
        return len(self._force())

    def __getitem__(self, i):
-        return self._force()[i]
-
-    def count(self):
-        frames = self._force()
-        return frames.shape[frames.ndim - 1]
-
-    def frame(self, i):
        return self._force()[..., i]

 def make_atari(env_id, max_episode_steps=None):
--- a/baselines/common/cmd_util.py
+++ b/baselines/common/cmd_util.py
@@ -170,7 +170,6 @@ def common_arg_parser():
    parser.add_argument('--save_path', help='Path to save trained model to', default=None, type=str)
    parser.add_argument('--save_video_interval', help='Save video every x steps (0 = disabled)', default=0, type=int)
    parser.add_argument('--save_video_length', help='Length of recorded video. Default: 200', default=200, type=int)
-    parser.add_argument('--log_path', help='Directory to save learning curve data.', default=None, type=str)
    parser.add_argument('--play', default=False, action='store_true')
    return parser

@@ -187,7 +186,7 @@ def robotics_arg_parser():

 def parse_unknown_args(args):
    """
-    Parse arguments not consumed by arg parser into a dictionary
+    Parse arguments not consumed by arg parser into a dicitonary
    """
    retval = {}
    preceded_by_key = False
--- a/baselines/common/mpi_adam_optimizer.py
+++ b/baselines/common/mpi_adam_optimizer.py
@@ -65,7 +65,7 @@ def check_synced(localval, comm=None):
    vals = comm.gather(localval)
    if comm.rank == 0:
        assert all(val==vals[0] for val in vals[1:]),\
-            'MpiAdamOptimizer detected that different workers have different weights: {}'.format(vals)
+            f'MpiAdamOptimizer detected that different workers have different weights: {vals}'

@with_mpi(timeout=5)
 def test_nonfreeze():
--- a/baselines/common/mpi_moments.py
+++ b/baselines/common/mpi_moments.py
@@ -12,9 +12,8 @@ def mpi_mean(x, axis=0, comm=None, keepdims=False):
    localsum = np.zeros(n+1, x.dtype)
    localsum[:n] = xsum.ravel()
    localsum[n] = x.shape[axis]
-    # globalsum = np.zeros_like(localsum)
-    # comm.Allreduce(localsum, globalsum, op=MPI.SUM)
-    globalsum = comm.allreduce(localsum, op=MPI.SUM)
+    globalsum = np.zeros_like(localsum)
+    comm.Allreduce(localsum, globalsum, op=MPI.SUM)
    return globalsum[:n].reshape(xsum.shape) / globalsum[n], globalsum[n]

 def mpi_moments(x, axis=0, comm=None, keepdims=False):
--- a/baselines/common/vec_env/shmem_vec_env.py
+++ b/baselines/common/vec_env/shmem_vec_env.py
@@ -70,11 +70,9 @@ class ShmemVecEnv(VecEnv):
        assert len(actions) == len(self.parent_pipes)
        for pipe, act in zip(self.parent_pipes, actions):
            pipe.send(('step', act))
-        self.waiting_step = True

    def step_wait(self):
        outs = [pipe.recv() for pipe in self.parent_pipes]
-        self.waiting_step = False
        obs, rews, dones, infos = zip(*outs)
        return self._decode_obses(obs), np.array(rews), np.array(dones), infos

--- a/baselines/common/vec_env/subproc_vec_env.py
+++ b/baselines/common/vec_env/subproc_vec_env.py
@@ -4,36 +4,33 @@ import numpy as np
 from .vec_env import VecEnv, CloudpickleWrapper, clear_mpi_env_vars


-def worker(remote, parent_remote, env_fn_wrappers):
-    def step_env(env, action):
-        ob, reward, done, info = env.step(action)
-        if done:
-            ob = env.reset()
-        return ob, reward, done, info
-
+def worker(remote, parent_remote, env_fn_wrapper):
    parent_remote.close()
-    envs = [env_fn_wrapper() for env_fn_wrapper in env_fn_wrappers.x]
+    env = env_fn_wrapper.x()
    try:
        while True:
            cmd, data = remote.recv()
            if cmd == 'step':
-                remote.send([step_env(env, action) for env, action in zip(envs, data)])
+                ob, reward, done, info = env.step(data)
+                if done:
+                    ob = env.reset()
+                remote.send((ob, reward, done, info))
            elif cmd == 'reset':
-                remote.send([env.reset() for env in envs])
+                ob = env.reset()
+                remote.send(ob)
            elif cmd == 'render':
-                remote.send([env.render(mode='rgb_array') for env in envs])
+                remote.send(env.render(mode='rgb_array'))
            elif cmd == 'close':
                remote.close()
                break
            elif cmd == 'get_spaces_spec':
-                remote.send((envs[0].observation_space, envs[0].action_space, envs[0].spec))
+                remote.send((env.observation_space, env.action_space, env.spec))
            else:
                raise NotImplementedError
    except KeyboardInterrupt:
        print('SubprocVecEnv worker: got KeyboardInterrupt')
    finally:
-        for env in envs:
-            env.close()
+        env.close()


 class SubprocVecEnv(VecEnv):
@@ -41,23 +38,17 @@ class SubprocVecEnv(VecEnv):
    VecEnv that runs multiple environments in parallel in subproceses and communicates with them via pipes.
    Recommended to use when num_envs > 1 and step() can be a bottleneck.
    """
-    def __init__(self, env_fns, spaces=None, context='spawn', in_series=1):
+    def __init__(self, env_fns, spaces=None, context='spawn'):
        """
        Arguments:

        env_fns: iterable of callables -  functions that create environments to run in subprocesses. Need to be cloud-pickleable
-        in_series: number of environments to run in series in a single process
-        (e.g. when len(env_fns) == 12 and in_series == 3, it will run 4 processes, each running 3 envs in series)
        """
        self.waiting = False
        self.closed = False
-        self.in_series = in_series
        nenvs = len(env_fns)
-        assert nenvs % in_series == 0, "Number of envs must be divisible by number of envs to run in series"
-        self.nremotes = nenvs // in_series
-        env_fns = np.array_split(env_fns, self.nremotes)
        ctx = mp.get_context(context)
-        self.remotes, self.work_remotes = zip(*[ctx.Pipe() for _ in range(self.nremotes)])
+        self.remotes, self.work_remotes = zip(*[ctx.Pipe() for _ in range(nenvs)])
        self.ps = [ctx.Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
        for p in self.ps:
@@ -70,11 +61,10 @@ class SubprocVecEnv(VecEnv):
        self.remotes[0].send(('get_spaces_spec', None))
        observation_space, action_space, self.spec = self.remotes[0].recv()
        self.viewer = None
-        VecEnv.__init__(self, nenvs, observation_space, action_space)
+        VecEnv.__init__(self, len(env_fns), observation_space, action_space)

    def step_async(self, actions):
        self._assert_not_closed()
-        actions = np.array_split(actions, self.nremotes)
        for remote, action in zip(self.remotes, actions):
            remote.send(('step', action))
        self.waiting = True
@@ -82,7 +72,6 @@ class SubprocVecEnv(VecEnv):
    def step_wait(self):
        self._assert_not_closed()
        results = [remote.recv() for remote in self.remotes]
-        results = _flatten_list(results)
        self.waiting = False
        obs, rews, dones, infos = zip(*results)
        return _flatten_obs(obs), np.stack(rews), np.stack(dones), infos
@@ -91,9 +80,7 @@ class SubprocVecEnv(VecEnv):
        self._assert_not_closed()
        for remote in self.remotes:
            remote.send(('reset', None))
-        obs = [remote.recv() for remote in self.remotes]
-        obs = _flatten_list(obs)
-        return _flatten_obs(obs)
+        return _flatten_obs([remote.recv() for remote in self.remotes])

    def close_extras(self):
        self.closed = True
@@ -110,7 +97,6 @@ class SubprocVecEnv(VecEnv):
        for pipe in self.remotes:
            pipe.send(('render', None))
        imgs = [pipe.recv() for pipe in self.remotes]
-        imgs = _flatten_list(imgs)
        return imgs

    def _assert_not_closed(self):
@@ -129,10 +115,3 @@ def _flatten_obs(obs):
        return {k: np.stack([o[k] for o in obs]) for k in keys}
    else:
        return np.stack(obs)
-
-def _flatten_list(l):
-    assert isinstance(l, (list, tuple))
-    assert len(l) > 0
-    assert all([len(l_) > 0 for l_ in l])
-
-    return [l__ for l_ in l for l__ in l_]
--- a/baselines/common/vec_env/test_vec_env.py
+++ b/baselines/common/vec_env/test_vec_env.py
@@ -67,50 +67,6 @@ def test_vec_env(klass, dtype):  # pylint: disable=R0914
    assert_venvs_equal(env1, env2, num_steps=num_steps)


-@pytest.mark.parametrize('dtype', ('uint8', 'float32'))
-@pytest.mark.parametrize('num_envs_in_series', (3, 4, 6))
-def test_sync_sampling(dtype, num_envs_in_series):
-    """
-    Test that a SubprocVecEnv running with envs in series
-    outputs the same as DummyVecEnv.
-    """
-    num_envs = 12
-    num_steps = 100
-    shape = (3, 8)
-
-    def make_fn(seed):
-        """
-        Get an environment constructor with a seed.
-        """
-        return lambda: SimpleEnv(seed, shape, dtype)
-    fns = [make_fn(i) for i in range(num_envs)]
-    env1 = DummyVecEnv(fns)
-    env2 = SubprocVecEnv(fns, in_series=num_envs_in_series)
-    assert_venvs_equal(env1, env2, num_steps=num_steps)
-
-
-@pytest.mark.parametrize('dtype', ('uint8', 'float32'))
-@pytest.mark.parametrize('num_envs_in_series', (3, 4, 6))
-def test_sync_sampling_sanity(dtype, num_envs_in_series):
-    """
-    Test that a SubprocVecEnv running with envs in series
-    outputs the same as SubprocVecEnv without running in series.
-    """
-    num_envs = 12
-    num_steps = 100
-    shape = (3, 8)
-
-    def make_fn(seed):
-        """
-        Get an environment constructor with a seed.
-        """
-        return lambda: SimpleEnv(seed, shape, dtype)
-    fns = [make_fn(i) for i in range(num_envs)]
-    env1 = SubprocVecEnv(fns)
-    env2 = SubprocVecEnv(fns, in_series=num_envs_in_series)
-    assert_venvs_equal(env1, env2, num_steps=num_steps)
-
-
 class SimpleEnv(gym.Env):
    """
    An environment with a pre-determined observation space
--- a/baselines/common/vec_env/util.py
+++ b/baselines/common/vec_env/util.py
@@ -38,9 +38,6 @@ def obs_space_info(obs_space):
    if isinstance(obs_space, gym.spaces.Dict):
        assert isinstance(obs_space.spaces, OrderedDict)
        subspaces = obs_space.spaces
-    elif isinstance(obs_space, gym.spaces.Tuple):
-        assert isinstance(obs_space.spaces, tuple)
-        subspaces = {i: obs_space.spaces[i] for i in range(len(obs_space.spaces))}
    else:
        subspaces = {None: obs_space}
    keys = []
--- a/baselines/ddpg/ddpg_learner.py
+++ b/baselines/ddpg/ddpg_learner.py
@@ -378,6 +378,11 @@ class DDPG(object):
            self.param_noise_stddev: self.param_noise.current_stddev,
        })

+        if MPI is not None:
+            mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
+        else:
+            mean_distance = distance
+
        if MPI is not None:
            mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
        else:
--- a/baselines/deepq/build_graph.py
+++ b/baselines/deepq/build_graph.py
@@ -13,7 +13,7 @@ The functions in this file can are used to create the following functions:
    stochastic: bool
        if set to False all the actions are always deterministic (default False)
    update_eps_ph: float
-        update epsilon a new value, if negative no update happens
+        update epsilon a new value, if negative not update happens
        (default: no update)

    Returns
--- a/baselines/deepq/deepq.py
+++ b/baselines/deepq/deepq.py
@@ -142,8 +142,9 @@ def learn(env,
        final value of random action probability
    train_freq: int
        update the model every `train_freq` steps.
+        set to None to disable printing
    batch_size: int
-        size of a batch sampled from replay buffer for training
+        size of a batched sampled from replay buffer for training
    print_freq: int
        how often to print out training progress
        set to None to disable printing
--- a/baselines/deepq/models.py
+++ b/baselines/deepq/models.py
@@ -2,6 +2,101 @@ import tensorflow as tf
 import tensorflow.contrib.layers as layers


+def _mlp(hiddens, input_, num_actions, scope, reuse=False, layer_norm=False):
+    with tf.variable_scope(scope, reuse=reuse):
+        out = input_
+        for hidden in hiddens:
+            out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
+            if layer_norm:
+                out = layers.layer_norm(out, center=True, scale=True)
+            out = tf.nn.relu(out)
+        q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
+        return q_out
+
+
+def mlp(hiddens=[], layer_norm=False):
+    """This model takes as input an observation and returns values of all actions.
+
+    Parameters
+    ----------
+    hiddens: [int]
+        list of sizes of hidden layers
+    layer_norm: bool
+        if true applies layer normalization for every layer
+        as described in https://arxiv.org/abs/1607.06450
+
+    Returns
+    -------
+    q_func: function
+        q_function for DQN algorithm.
+    """
+    return lambda *args, **kwargs: _mlp(hiddens, layer_norm=layer_norm, *args, **kwargs)
+
+
+def _cnn_to_mlp(convs, hiddens, dueling, input_, num_actions, scope, reuse=False, layer_norm=False):
+    with tf.variable_scope(scope, reuse=reuse):
+        out = input_
+        with tf.variable_scope("convnet"):
+            for num_outputs, kernel_size, stride in convs:
+                out = layers.convolution2d(out,
+                                           num_outputs=num_outputs,
+                                           kernel_size=kernel_size,
+                                           stride=stride,
+                                           activation_fn=tf.nn.relu)
+        conv_out = layers.flatten(out)
+        with tf.variable_scope("action_value"):
+            action_out = conv_out
+            for hidden in hiddens:
+                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
+                if layer_norm:
+                    action_out = layers.layer_norm(action_out, center=True, scale=True)
+                action_out = tf.nn.relu(action_out)
+            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)
+
+        if dueling:
+            with tf.variable_scope("state_value"):
+                state_out = conv_out
+                for hidden in hiddens:
+                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
+                    if layer_norm:
+                        state_out = layers.layer_norm(state_out, center=True, scale=True)
+                    state_out = tf.nn.relu(state_out)
+                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
+            action_scores_mean = tf.reduce_mean(action_scores, 1)
+            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
+            q_out = state_score + action_scores_centered
+        else:
+            q_out = action_scores
+        return q_out
+
+
+def cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False):
+    """This model takes as input an observation and returns values of all actions.
+
+    Parameters
+    ----------
+    convs: [(int, int, int)]
+        list of convolutional layers in form of
+        (num_outputs, kernel_size, stride)
+    hiddens: [int]
+        list of sizes of hidden layers
+    dueling: bool
+        if true double the output MLP to compute a baseline
+        for action scores
+    layer_norm: bool
+        if true applies layer normalization for every layer
+        as described in https://arxiv.org/abs/1607.06450
+
+    Returns
+    -------
+    q_func: function
+        q_function for DQN algorithm.
+    """
+
+    return lambda *args, **kwargs: _cnn_to_mlp(convs, hiddens, dueling, layer_norm=layer_norm, *args, **kwargs)
+
+
+
 def build_q_func(network, hiddens=[256], dueling=True, layer_norm=False, **network_kwargs):
    if isinstance(network, str):
        from baselines.common.models import get_network_builder
--- a/baselines/gail/dataset/mujoco_dset.py
+++ b/baselines/gail/dataset/mujoco_dset.py
@@ -77,7 +77,7 @@ class Mujoco_Dset(object):
        self.log_info()

    def log_info(self):
-        logger.log("Total trajectories: %d" % self.num_traj)
+        logger.log("Total trajectorues: %d" % self.num_traj)
        logger.log("Total transitions: %d" % self.num_transition)
        logger.log("Average returns: %f" % self.avg_ret)
        logger.log("Std for returns: %f" % self.std_ret)
--- a/baselines/her/rollout.py
+++ b/baselines/her/rollout.py
@@ -15,7 +15,8 @@ class RolloutWorker:
        """Rollout worker generates experience by interacting with one or many environments.

        Args:
-            venv: vectorized gym environments.
+            make_env (function): a factory function that creates a new instance of the environment
+                when called
            policy (object): the policy that is used to act
            dims (dict of ints): the dimensions for observations (o), goals (g), and actions (u)
            logger (object): the logger that is used by the rollout worker
--- a/baselines/logger.py
+++ b/baselines/logger.py
@@ -379,8 +379,7 @@ def configure(dir=None, format_strs=None, comm=None, log_suffix=''):
        dir = osp.join(tempfile.gettempdir(),
            datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
    assert isinstance(dir, str)
-    dir = os.path.expanduser(dir)
-    os.makedirs(os.path.expanduser(dir), exist_ok=True)
+    os.makedirs(dir, exist_ok=True)

    rank = get_rank_without_mpi_import()
    if rank > 0:
@@ -395,8 +394,7 @@ def configure(dir=None, format_strs=None, comm=None, log_suffix=''):
    output_formats = [make_output_format(f, dir, log_suffix) for f in format_strs]

    Logger.CURRENT = Logger(dir=dir, output_formats=output_formats, comm=comm)
-    if output_formats:
-        log('Logging to %s'%dir)
+    log('Logging to %s'%dir)

 def _configure_default_logger():
    configure()
--- a/baselines/run.py
+++ b/baselines/run.py
@@ -32,7 +32,7 @@ except ImportError:
 _game_envs = defaultdict(set)
 for env in gym.envs.registry.all():
    # TODO: solve this with regexes
-    env_type = env.entry_point.split(':')[0].split('.')[-1]
+    env_type = env._entry_point.split(':')[0].split('.')[-1]
    _game_envs[env_type].add(env.id)

 # reading benchmark names directly from retro requires
@@ -126,7 +126,7 @@ def get_env_type(args):

    # Re-parse the gym registry, since we could have new envs since last time.
    for env in gym.envs.registry.all():
-        env_type = env.entry_point.split(':')[0].split('.')[-1]
+        env_type = env._entry_point.split(':')[0].split('.')[-1]
        _game_envs[env_type].add(env.id)  # This is a set so add is idempotent

    if env_id in _game_envs.keys():
@@ -192,12 +192,6 @@ def parse_cmdline_kwargs(args):
    return {k: parse(v) for k,v in parse_unknown_args(args).items()}


-def configure_logger(log_path, **kwargs):
-    if log_path is not None:
-        logger.configure(log_path)
-    else:
-        logger.configure(**kwargs)
-

 def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)
@@ -208,10 +202,10 @@ def main(args):

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
-        configure_logger(args.log_path)
+        logger.configure()
    else:
+        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()
-        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

--- a/setup.cfg
+++ b/setup.cfg
@@ -4,3 +4,4 @@ exclude =
    .git,
    __pycache__,
    baselines/ppo1,
+    baselines/bench,
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@ setup(name='baselines',
      author='OpenAI',
      url='https://github.com/openai/baselines',
      author_email='gym@openai.com',
-      version='0.1.6')
+      version='0.1.5')


 # ensure there is some tensorflow build with version above 1.4