export: Fix deepq param noise refactoring, remove atari experiments and azure dependency

2018-02-14 21:42:22 -08:00
parent 98257ef8c9
commit edb52c22a5
15 changed files with 85 additions and 713 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 *.swp
 *.pyc
+*.pkl
 *.py~
+.pytest_cache
 .DS_Store
 .idea

@@ -33,3 +35,4 @@ src

 MUJOCO_LOG.TXT

+
--- a/baselines/acer/acer_simple.py
+++ b/baselines/acer/acer_simple.py
@@ -243,7 +243,7 @@ class Runner(object):
            mb_mus.append(mus)
            mb_dones.append(self.dones)
            obs, rewards, dones, _ = self.env.step(actions)
-            # states information for statefull models like LSTM
+            # states information for statefull predictors like LSTM
            self.states = states
            self.dones = dones
            self.update_obs(obs, dones)
@@ -260,7 +260,7 @@ class Runner(object):

        mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0)

-        mb_masks = mb_dones # Used for statefull models like LSTM's to mask state when done
+        mb_masks = mb_dones # Used for statefull predictors like LSTM's to mask state when done
        mb_dones = mb_dones[:, 1:] # Used for calculating returns. The dones array is now aligned with rewards

        # shapes are now [nenv, nsteps, []]
--- a/baselines/acktr/kfac.py
+++ b/baselines/acktr/kfac.py
@@ -134,7 +134,7 @@ class KfacOptimizer():
        # check associated weights and bias for homogeneous coordinate representation
        # and check redundent factors
        # TO-DO: there may be a bug to detect associate bias and weights for
-        # forking layer, e.g. in inception models.
+        # forking layer, e.g. in inception predictors.
        for param in varlist:
            factorTensors[param]['assnWeights'] = None
            factorTensors[param]['assnBias'] = None
--- a/baselines/bench/benchmarks.py
+++ b/baselines/bench/benchmarks.py
@@ -76,9 +76,9 @@ register_benchmark({
 # MuJoCo

 _mujocosmall = [
-    'InvertedDoublePendulum-v1', 'InvertedPendulum-v1',
-    'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1',
-    'Reacher-v1', 'Swimmer-v1']
+    'InvertedDoublePendulum-v2', 'InvertedPendulum-v2',
+    'HalfCheetah-v2', 'Hopper-v2', 'Walker2d-v2',
+    'Reacher-v2', 'Swimmer-v2']
 register_benchmark({
    'name': 'Mujoco1M',
    'description': 'Some small 2D MuJoCo tasks, run for 1M timesteps',
--- a/baselines/common/atari_wrappers.py
+++ b/baselines/common/atari_wrappers.py
@@ -193,13 +193,26 @@ class LazyFrames(object):

        You'd not believe how complex the previous solution was."""
        self._frames = frames
+        self._out = None
+
+    def _force(self):
+        if self._out is None:
+            self._out = np.concatenate(self._frames, axis=2)
+            self._frames = None
+        return self._out

    def __array__(self, dtype=None):
-        out = np.concatenate(self._frames, axis=2)
+        out = self._force()
        if dtype is not None:
            out = out.astype(dtype)
        return out

+    def __len__(self):
+        return len(self._force())
+
+    def __getitem__(self, i):
+        return self._force()[i]
+
 def make_atari(env_id):
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
--- a/baselines/common/azure_utils.py
+++ b/baselines/common/azure_utils.py
@@ -1,154 +0,0 @@
-import os
-import tempfile
-import zipfile
-
-from azure.common import AzureMissingResourceHttpError
-try:
-    from azure.storage.blob import BlobService
-except ImportError:
-    from azure.storage.blob import BlockBlobService as BlobService
-from shutil import unpack_archive
-from threading import Event
-
-# TODOS: use Azure snapshots instead of hacky backups
-
-def fixed_list_blobs(service, *args, **kwargs):
-    """By defualt list_containers only returns a subset of results.
-
-    This function attempts to fix this.
-    """
-    res = []
-    next_marker = None
-    while next_marker is None or len(next_marker) > 0:
-        kwargs['marker'] = next_marker
-        gen = service.list_blobs(*args, **kwargs)
-        for b in gen:
-            res.append(b.name)
-        next_marker = gen.next_marker
-    return res
-
-
-def make_archive(source_path, dest_path):
-    if source_path.endswith(os.path.sep):
-        source_path = source_path.rstrip(os.path.sep)
-    prefix_path = os.path.dirname(source_path)
-    with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_STORED) as zf:
-        if os.path.isdir(source_path):
-            for dirname, _subdirs, files in os.walk(source_path):
-                zf.write(dirname, os.path.relpath(dirname, prefix_path))
-                for filename in files:
-                    filepath = os.path.join(dirname, filename)
-                    zf.write(filepath, os.path.relpath(filepath, prefix_path))
-        else:
-            zf.write(source_path, os.path.relpath(source_path, prefix_path))
-
-
-class Container(object):
-    services = {}
-
-    def __init__(self, account_name, account_key, container_name, maybe_create=False):
-        self._account_name = account_name
-        self._container_name = container_name
-        if account_name not in Container.services:
-            Container.services[account_name] = BlobService(account_name, account_key)
-        self._service = Container.services[account_name]
-        if maybe_create:
-            self._service.create_container(self._container_name, fail_on_exist=False)
-
-    def put(self, source_path, blob_name, callback=None):
-        """Upload a file or directory from `source_path` to azure blob `blob_name`.
-
-        Upload progress can be traced by an optional callback.
-        """
-        upload_done = Event()
-
-        def progress_callback(current, total):
-            if callback:
-                callback(current, total)
-            if current >= total:
-                upload_done.set()
-
-        # Attempt to make backup if an existing version is already available
-        try:
-            x_ms_copy_source = "https://{}.blob.core.windows.net/{}/{}".format(
-                self._account_name,
-                self._container_name,
-                blob_name
-            )
-            self._service.copy_blob(
-                container_name=self._container_name,
-                blob_name=blob_name + ".backup",
-                x_ms_copy_source=x_ms_copy_source
-            )
-        except AzureMissingResourceHttpError:
-            pass
-
-        with tempfile.TemporaryDirectory() as td:
-            arcpath = os.path.join(td, "archive.zip")
-            make_archive(source_path, arcpath)
-            self._service.put_block_blob_from_path(
-                container_name=self._container_name,
-                blob_name=blob_name,
-                file_path=arcpath,
-                max_connections=4,
-                progress_callback=progress_callback,
-                max_retries=10)
-            upload_done.wait()
-
-    def get(self, dest_path, blob_name, callback=None):
-        """Download a file or directory to `dest_path` to azure blob `blob_name`.
-
-        Warning! If directory is downloaded the `dest_path` is the parent directory.
-
-        Upload progress can be traced by an optional callback.
-        """
-        download_done = Event()
-
-        def progress_callback(current, total):
-            if callback:
-                callback(current, total)
-            if current >= total:
-                download_done.set()
-
-        with tempfile.TemporaryDirectory() as td:
-            arcpath = os.path.join(td, "archive.zip")
-            for backup_blob_name in [blob_name, blob_name + '.backup']:
-                try:
-                    properties = self._service.get_blob_properties(
-                        blob_name=backup_blob_name,
-                        container_name=self._container_name
-                    )
-                    if hasattr(properties, 'properties'):
-                        # Annoyingly, Azure has changed the API and this now returns a blob
-                        # instead of it's properties with up-to-date azure package.
-                        blob_size = properties.properties.content_length
-                    else:
-                        blob_size = properties['content-length']
-                    if int(blob_size) > 0:
-                        self._service.get_blob_to_path(
-                            container_name=self._container_name,
-                            blob_name=backup_blob_name,
-                            file_path=arcpath,
-                            max_connections=4,
-                            progress_callback=progress_callback)
-                        unpack_archive(arcpath, dest_path)
-                        download_done.wait()
-                        return True
-                except AzureMissingResourceHttpError:
-                    pass
-        return False
-
-    def list(self, prefix=None):
-        """List all blobs in the container."""
-        return fixed_list_blobs(self._service, self._container_name, prefix=prefix)
-
-    def exists(self, blob_name):
-        """Returns true if `blob_name` exists in container."""
-        try:
-            self._service.get_blob_properties(
-                blob_name=blob_name,
-                container_name=self._container_name
-            )
-            return True
-        except AzureMissingResourceHttpError:
-            return False
--- a/baselines/common/tests/test_tf_util.py
+++ b/baselines/common/tests/test_tf_util.py
@@ -8,32 +8,32 @@ from baselines.common.tf_util import (


 def test_function():
-    tf.reset_default_graph()
-    x = tf.placeholder(tf.int32, (), name="x")
-    y = tf.placeholder(tf.int32, (), name="y")
-    z = 3 * x + 2 * y
-    lin = function([x, y], z, givens={y: 0})
+    with tf.Graph().as_default():
+        x = tf.placeholder(tf.int32, (), name="x")
+        y = tf.placeholder(tf.int32, (), name="y")
+        z = 3 * x + 2 * y
+        lin = function([x, y], z, givens={y: 0})

-    with single_threaded_session():
-        initialize()
+        with single_threaded_session():
+            initialize()

-        assert lin(2) == 6
-        assert lin(2, 2) == 10
+            assert lin(2) == 6
+            assert lin(2, 2) == 10


 def test_multikwargs():
-    tf.reset_default_graph()
-    x = tf.placeholder(tf.int32, (), name="x")
-    with tf.variable_scope("other"):
-        x2 = tf.placeholder(tf.int32, (), name="x")
-    z = 3 * x + 2 * x2
+    with tf.Graph().as_default():
+        x = tf.placeholder(tf.int32, (), name="x")
+        with tf.variable_scope("other"):
+            x2 = tf.placeholder(tf.int32, (), name="x")
+        z = 3 * x + 2 * x2

-    lin = function([x, x2], z, givens={x2: 0})
-    with single_threaded_session():
-        initialize()
-        assert lin(2) == 6
-        assert lin(2, 2) == 10
-        expt_caught = False
+        lin = function([x, x2], z, givens={x2: 0})
+        with single_threaded_session():
+            initialize()
+            assert lin(2) == 6
+            assert lin(2, 2) == 10
+            expt_caught = False


 if __name__ == '__main__':
--- a/baselines/deepq/build_graph.py
+++ b/baselines/deepq/build_graph.py
@@ -97,6 +97,37 @@ import tensorflow as tf
 import baselines.common.tf_util as U


+def scope_vars(scope, trainable_only=False):
+    """
+    Get variables inside a scope
+    The scope can be specified as a string
+    Parameters
+    ----------
+    scope: str or VariableScope
+        scope in which the variables reside.
+    trainable_only: bool
+        whether or not to return only the variables that were marked as trainable.
+    Returns
+    -------
+    vars: [tf.Variable]
+        list of variables in `scope`.
+    """
+    return tf.get_collection(
+        tf.GraphKeys.TRAINABLE_VARIABLES if trainable_only else tf.GraphKeys.GLOBAL_VARIABLES,
+        scope=scope if isinstance(scope, str) else scope.name
+    )
+
+
+def scope_name():
+    """Returns the name of current scope as a string, e.g. deepq/q_func"""
+    return tf.get_variable_scope().name
+
+
+def absolute_scope_name(relative_scope_name):
+    """Appends parent scope name to `relative_scope_name`"""
+    return scope_name() + "/" + relative_scope_name
+
+
 def default_param_noise_filter(var):
    if var not in tf.trainable_variables():
        # We never perturb non-trainable vars.
@@ -225,8 +256,8 @@ def build_act_with_param_noise(make_obs_ph, q_func, num_actions, scope="deepq",
        # https://stackoverflow.com/questions/37063952/confused-by-the-behavior-of-tf-cond for
        # a more detailed discussion.
        def perturb_vars(original_scope, perturbed_scope):
-            all_vars = U.scope_vars(U.absolute_scope_name(original_scope))
-            all_perturbed_vars = U.scope_vars(U.absolute_scope_name(perturbed_scope))
+            all_vars = scope_vars(absolute_scope_name(original_scope))
+            all_perturbed_vars = scope_vars(absolute_scope_name(perturbed_scope))
            assert len(all_vars) == len(all_perturbed_vars)
            perturb_ops = []
            for var, perturbed_var in zip(all_vars, all_perturbed_vars):
@@ -274,10 +305,12 @@ def build_act_with_param_noise(make_obs_ph, q_func, num_actions, scope="deepq",
            tf.cond(update_param_noise_scale_ph, lambda: update_scale(), lambda: tf.Variable(0., trainable=False)),
            update_param_noise_threshold_expr,
        ]
-        act = U.function(inputs=[observations_ph, stochastic_ph, update_eps_ph, reset_ph, update_param_noise_threshold_ph, update_param_noise_scale_ph],
+        _act = U.function(inputs=[observations_ph, stochastic_ph, update_eps_ph, reset_ph, update_param_noise_threshold_ph, update_param_noise_scale_ph],
                         outputs=output_actions,
                         givens={update_eps_ph: -1.0, stochastic_ph: True, reset_ph: False, update_param_noise_threshold_ph: False, update_param_noise_scale_ph: False},
                         updates=updates)
+        def act(ob, reset, update_param_noise_threshold, update_param_noise_scale, stochastic=True, update_eps=-1):
+            return _act(ob, stochastic, update_eps, reset, update_param_noise_threshold, update_param_noise_scale)
        return act


--- a/baselines/deepq/experiments/atari/init.py
+++ b/baselines/deepq/experiments/atari/init.py
--- a/baselines/deepq/experiments/atari/download_model.py
+++ b/baselines/deepq/experiments/atari/download_model.py
@@ -1,51 +0,0 @@
-import argparse
-import progressbar
-
-from baselines.common.azure_utils import Container
-
-
-def parse_args():
-    parser = argparse.ArgumentParser("Download a pretrained model from Azure.")
-    # Environment
-    parser.add_argument("--model-dir", type=str, default=None,
-                        help="save model in this directory this directory. ")
-    parser.add_argument("--account-name", type=str, default="openaisciszymon",
-                        help="account name for Azure Blob Storage")
-    parser.add_argument("--account-key", type=str, default=None,
-                        help="account key for Azure Blob Storage")
-    parser.add_argument("--container", type=str, default="dqn-blogpost",
-                        help="container name and blob name separated by colon serparated by colon")
-    parser.add_argument("--blob", type=str, default=None, help="blob with the model")
-    return parser.parse_args()
-
-
-def main():
-    args = parse_args()
-    c = Container(account_name=args.account_name,
-                  account_key=args.account_key,
-                  container_name=args.container)
-
-    if args.blob is None:
-        print("Listing available models:")
-        print()
-        for blob in sorted(c.list(prefix="model-")):
-            print(blob)
-    else:
-        print("Downloading {} to {}...".format(args.blob, args.model_dir))
-        bar = None
-
-        def callback(current, total):
-            nonlocal bar
-            if bar is None:
-                bar = progressbar.ProgressBar(max_value=total)
-            bar.update(current)
-
-        assert c.exists(args.blob), "model {} does not exist".format(args.blob)
-
-        assert args.model_dir is not None
-
-        c.get(args.model_dir, args.blob, callback=callback)
-
-
-if __name__ == '__main__':
-    main()
--- a/baselines/deepq/experiments/atari/enjoy.py
+++ b/baselines/deepq/experiments/atari/enjoy.py
@@ -1,71 +0,0 @@
-import argparse
-import gym
-import os
-import numpy as np
-
-from gym.wrappers.monitoring.video_recorder import VideoRecorder
-
-import baselines.common.tf_util as U
-
-from baselines import deepq
-from baselines.common.misc_util import (
-    boolean_flag,
-)
-from baselines import bench
-from baselines.common.atari_wrappers_deprecated import wrap_dqn
-from baselines.deepq.experiments.atari.model import model, dueling_model
-from baselines.deepq.utils import Uint8Input, load_state
-
-
-def parse_args():
-    parser = argparse.ArgumentParser("Run an already learned DQN model.")
-    # Environment
-    parser.add_argument("--env", type=str, required=True, help="name of the game")
-    parser.add_argument("--model-dir", type=str, default=None, help="load model from this directory. ")
-    parser.add_argument("--video", type=str, default=None, help="Path to mp4 file where the video of first episode will be recorded.")
-    boolean_flag(parser, "stochastic", default=True, help="whether or not to use stochastic actions according to models eps value")
-    boolean_flag(parser, "dueling", default=False, help="whether or not to use dueling model")
-
-    return parser.parse_args()
-
-
-def make_env(game_name):
-    env = gym.make(game_name + "NoFrameskip-v4")
-    env = bench.Monitor(env, None)
-    env = wrap_dqn(env)
-    return env
-
-
-def play(env, act, stochastic, video_path):
-    num_episodes = 0
-    video_recorder = None
-    video_recorder = VideoRecorder(
-        env, video_path, enabled=video_path is not None)
-    obs = env.reset()
-    while True:
-        env.unwrapped.render()
-        video_recorder.capture_frame()
-        action = act(np.array(obs)[None], stochastic=stochastic)[0]
-        obs, rew, done, info = env.step(action)
-        if done:
-            obs = env.reset()
-        if len(info["rewards"]) > num_episodes:
-            if len(info["rewards"]) == 1 and video_recorder.enabled:
-                # save video of first episode
-                print("Saved video.")
-                video_recorder.close()
-                video_recorder.enabled = False
-            print(info["rewards"][-1])
-            num_episodes = len(info["rewards"])
-
-
-if __name__ == '__main__':
-    with U.make_session(4) as sess:
-        args = parse_args()
-        env = make_env(args.env)
-        act = deepq.build_act(
-            make_obs_ph=lambda name: Uint8Input(env.observation_space.shape, name=name),
-            q_func=dueling_model if args.dueling else model,
-            num_actions=env.action_space.n)
-        load_state(os.path.join(args.model_dir, "saved"))
-        play(env, act, args.stochastic, args.video)
--- a/baselines/deepq/experiments/atari/model.py
+++ b/baselines/deepq/experiments/atari/model.py
@@ -1,44 +0,0 @@
-import tensorflow as tf
-import tensorflow.contrib.layers as layers
-
-
-def model(img_in, num_actions, scope, reuse=False):
-    """As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf"""
-    with tf.variable_scope(scope, reuse=reuse):
-        out = img_in
-        with tf.variable_scope("convnet"):
-            # original architecture
-            out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
-            out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
-            out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
-        conv_out = layers.flatten(out)
-
-        with tf.variable_scope("action_value"):
-            value_out = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
-            value_out = tf.nn.relu(value_out)
-            value_out = layers.fully_connected(value_out, num_outputs=num_actions, activation_fn=None)
-        return value_out
-
-
-def dueling_model(img_in, num_actions, scope, reuse=False):
-    """As described in https://arxiv.org/abs/1511.06581"""
-    with tf.variable_scope(scope, reuse=reuse):
-        out = img_in
-        with tf.variable_scope("convnet"):
-            # original architecture
-            out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
-            out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
-            out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
-        conv_out = layers.flatten(out)
-
-        with tf.variable_scope("state_value"):
-            state_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
-            state_hidden = tf.nn.relu(state_hidden)
-            state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None)
-        with tf.variable_scope("action_value"):
-            actions_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
-            actions_hidden = tf.nn.relu(actions_hidden)
-            action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None)
-            action_scores_mean = tf.reduce_mean(action_scores, 1)
-            action_scores = action_scores - tf.expand_dims(action_scores_mean, 1)
-        return state_score + action_scores
--- a/baselines/deepq/experiments/atari/train.py
+++ b/baselines/deepq/experiments/atari/train.py
@@ -1,274 +0,0 @@
-import argparse
-import gym
-import numpy as np
-import os
-import tensorflow as tf
-import tempfile
-import time
-import json
-
-import baselines.common.tf_util as U
-
-from baselines import logger
-from baselines import deepq
-from baselines.deepq.replay_buffer import ReplayBuffer, PrioritizedReplayBuffer
-from baselines.common.misc_util import (
-    boolean_flag,
-    pickle_load,
-    pretty_eta,
-    relatively_safe_pickle_dump,
-    set_global_seeds,
-    RunningAvg,
-)
-from baselines.common.schedules import LinearSchedule, PiecewiseSchedule
-from baselines import bench
-from baselines.common.atari_wrappers_deprecated import wrap_dqn
-from baselines.common.azure_utils import Container
-from .model import model, dueling_model
-from baselines.deepq.utils import Uint8Input, load_state, save_state
-
-
-def parse_args():
-    parser = argparse.ArgumentParser("DQN experiments for Atari games")
-    # Environment
-    parser.add_argument("--env", type=str, default="Pong", help="name of the game")
-    parser.add_argument("--seed", type=int, default=42, help="which seed to use")
-    # Core DQN parameters
-    parser.add_argument("--replay-buffer-size", type=int, default=int(1e6), help="replay buffer size")
-    parser.add_argument("--lr", type=float, default=1e-4, help="learning rate for Adam optimizer")
-    parser.add_argument("--num-steps", type=int, default=int(2e8), help="total number of steps to run the environment for")
-    parser.add_argument("--batch-size", type=int, default=32, help="number of transitions to optimize at the same time")
-    parser.add_argument("--learning-freq", type=int, default=4, help="number of iterations between every optimization step")
-    parser.add_argument("--target-update-freq", type=int, default=40000, help="number of iterations between every target network update")
-    parser.add_argument("--param-noise-update-freq", type=int, default=50, help="number of iterations between every re-scaling of the parameter noise")
-    parser.add_argument("--param-noise-reset-freq", type=int, default=10000, help="maximum number of steps to take per episode before re-perturbing the exploration policy")
-    # Bells and whistles
-    boolean_flag(parser, "double-q", default=True, help="whether or not to use double q learning")
-    boolean_flag(parser, "dueling", default=False, help="whether or not to use dueling model")
-    boolean_flag(parser, "prioritized", default=False, help="whether or not to use prioritized replay buffer")
-    boolean_flag(parser, "param-noise", default=False, help="whether or not to use parameter space noise for exploration")
-    boolean_flag(parser, "layer-norm", default=False, help="whether or not to use layer norm (should be True if param_noise is used)")
-    boolean_flag(parser, "gym-monitor", default=False, help="whether or not to use a OpenAI Gym monitor (results in slower training due to video recording)")
-    parser.add_argument("--prioritized-alpha", type=float, default=0.6, help="alpha parameter for prioritized replay buffer")
-    parser.add_argument("--prioritized-beta0", type=float, default=0.4, help="initial value of beta parameters for prioritized replay")
-    parser.add_argument("--prioritized-eps", type=float, default=1e-6, help="eps parameter for prioritized replay buffer")
-    # Checkpointing
-    parser.add_argument("--save-dir", type=str, default=None, help="directory in which training state and model should be saved.")
-    parser.add_argument("--save-azure-container", type=str, default=None,
-                        help="It present data will saved/loaded from Azure. Should be in format ACCOUNT_NAME:ACCOUNT_KEY:CONTAINER")
-    parser.add_argument("--save-freq", type=int, default=1e6, help="save model once every time this many iterations are completed")
-    boolean_flag(parser, "load-on-start", default=True, help="if true and model was previously saved then training will be resumed")
-    return parser.parse_args()
-
-
-def make_env(game_name):
-    env = gym.make(game_name + "NoFrameskip-v4")
-    monitored_env = bench.Monitor(env, logger.get_dir())  # puts rewards and number of steps in info, before environment is wrapped
-    env = wrap_dqn(monitored_env)  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
-    return env, monitored_env
-
-
-def maybe_save_model(savedir, container, state):
-    """This function checkpoints the model and state of the training algorithm."""
-    if savedir is None:
-        return
-    start_time = time.time()
-    model_dir = "model-{}".format(state["num_iters"])
-    save_state(os.path.join(savedir, model_dir, "saved"))
-    if container is not None:
-        container.put(os.path.join(savedir, model_dir), model_dir)
-    relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True)
-    if container is not None:
-        container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip')
-    relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl'))
-    if container is not None:
-        container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl')
-    logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
-
-
-def maybe_load_model(savedir, container):
-    """Load model if present at the specified path."""
-    if savedir is None:
-        return
-
-    state_path = os.path.join(os.path.join(savedir, 'training_state.pkl.zip'))
-    if container is not None:
-        logger.log("Attempting to download model from Azure")
-        found_model = container.get(savedir, 'training_state.pkl.zip')
-    else:
-        found_model = os.path.exists(state_path)
-    if found_model:
-        state = pickle_load(state_path, compression=True)
-        model_dir = "model-{}".format(state["num_iters"])
-        if container is not None:
-            container.get(savedir, model_dir)
-        load_state(os.path.join(savedir, model_dir, "saved"))
-        logger.log("Loaded models checkpoint at {} iterations".format(state["num_iters"]))
-        return state
-
-
-if __name__ == '__main__':
-    args = parse_args()
-
-    # Parse savedir and azure container.
-    savedir = args.save_dir
-    if savedir is None:
-        savedir = os.getenv('OPENAI_LOGDIR', None)
-    if args.save_azure_container is not None:
-        account_name, account_key, container_name = args.save_azure_container.split(":")
-        container = Container(account_name=account_name,
-                              account_key=account_key,
-                              container_name=container_name,
-                              maybe_create=True)
-        if savedir is None:
-            # Careful! This will not get cleaned up. Docker spoils the developers.
-            savedir = tempfile.TemporaryDirectory().name
-    else:
-        container = None
-    # Create and seed the env.
-    env, monitored_env = make_env(args.env)
-    if args.seed > 0:
-        set_global_seeds(args.seed)
-        env.unwrapped.seed(args.seed)
-
-    if args.gym_monitor and savedir:
-        env = gym.wrappers.Monitor(env, os.path.join(savedir, 'gym_monitor'), force=True)
-
-    if savedir:
-        with open(os.path.join(savedir, 'args.json'), 'w') as f:
-            json.dump(vars(args), f)
-
-    with U.make_session(4) as sess:
-        # Create training graph and replay buffer
-        def model_wrapper(img_in, num_actions, scope, **kwargs):
-            actual_model = dueling_model if args.dueling else model
-            return actual_model(img_in, num_actions, scope, layer_norm=args.layer_norm, **kwargs)
-        act, train, update_target, debug = deepq.build_train(
-            make_obs_ph=lambda name: Uint8Input(env.observation_space.shape, name=name),
-            q_func=model_wrapper,
-            num_actions=env.action_space.n,
-            optimizer=tf.train.AdamOptimizer(learning_rate=args.lr, epsilon=1e-4),
-            gamma=0.99,
-            grad_norm_clipping=10,
-            double_q=args.double_q,
-            param_noise=args.param_noise
-        )
-
-        approximate_num_iters = args.num_steps / 4
-        exploration = PiecewiseSchedule([
-            (0, 1.0),
-            (approximate_num_iters / 50, 0.1),
-            (approximate_num_iters / 5, 0.01)
-        ], outside_value=0.01)
-
-        if args.prioritized:
-            replay_buffer = PrioritizedReplayBuffer(args.replay_buffer_size, args.prioritized_alpha)
-            beta_schedule = LinearSchedule(approximate_num_iters, initial_p=args.prioritized_beta0, final_p=1.0)
-        else:
-            replay_buffer = ReplayBuffer(args.replay_buffer_size)
-
-        U.initialize()
-        update_target()
-        num_iters = 0
-
-        # Load the model
-        state = maybe_load_model(savedir, container)
-        if state is not None:
-            num_iters, replay_buffer = state["num_iters"], state["replay_buffer"],
-            monitored_env.set_state(state["monitor_state"])
-
-        start_time, start_steps = None, None
-        steps_per_iter = RunningAvg(0.999)
-        iteration_time_est = RunningAvg(0.999)
-        obs = env.reset()
-        num_iters_since_reset = 0
-        reset = True
-
-        # Main trianing loop
-        while True:
-            num_iters += 1
-            num_iters_since_reset += 1
-
-            # Take action and store transition in the replay buffer.
-            kwargs = {}
-            if not args.param_noise:
-                update_eps = exploration.value(num_iters)
-                update_param_noise_threshold = 0.
-            else:
-                if args.param_noise_reset_freq > 0 and num_iters_since_reset > args.param_noise_reset_freq:
-                    # Reset param noise policy since we have exceeded the maximum number of steps without a reset.
-                    reset = True
-
-                update_eps = 0.01  # ensures that we cannot get stuck completely
-                # Compute the threshold such that the KL divergence between perturbed and non-perturbed
-                # policy is comparable to eps-greedy exploration with eps = exploration.value(t).
-                # See Appendix C.1 in Parameter Space Noise for Exploration, Plappert et al., 2017
-                # for detailed explanation.
-                update_param_noise_threshold = -np.log(1. - exploration.value(num_iters) + exploration.value(num_iters) / float(env.action_space.n))
-                kwargs['reset'] = reset
-                kwargs['update_param_noise_threshold'] = update_param_noise_threshold
-                kwargs['update_param_noise_scale'] = (num_iters % args.param_noise_update_freq == 0)
-
-            action = act(np.array(obs)[None], update_eps=update_eps, **kwargs)[0]
-            reset = False
-            new_obs, rew, done, info = env.step(action)
-            replay_buffer.add(obs, action, rew, new_obs, float(done))
-            obs = new_obs
-            if done:
-                num_iters_since_reset = 0
-                obs = env.reset()
-                reset = True
-
-            if (num_iters > max(5 * args.batch_size, args.replay_buffer_size // 20) and
-                    num_iters % args.learning_freq == 0):
-                # Sample a bunch of transitions from replay buffer
-                if args.prioritized:
-                    experience = replay_buffer.sample(args.batch_size, beta=beta_schedule.value(num_iters))
-                    (obses_t, actions, rewards, obses_tp1, dones, weights, batch_idxes) = experience
-                else:
-                    obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(args.batch_size)
-                    weights = np.ones_like(rewards)
-                # Minimize the error in Bellman's equation and compute TD-error
-                td_errors = train(obses_t, actions, rewards, obses_tp1, dones, weights)
-                # Update the priorities in the replay buffer
-                if args.prioritized:
-                    new_priorities = np.abs(td_errors) + args.prioritized_eps
-                    replay_buffer.update_priorities(batch_idxes, new_priorities)
-            # Update target network.
-            if num_iters % args.target_update_freq == 0:
-                update_target()
-
-            if start_time is not None:
-                steps_per_iter.update(info['steps'] - start_steps)
-                iteration_time_est.update(time.time() - start_time)
-            start_time, start_steps = time.time(), info["steps"]
-
-            # Save the model and training state.
-            if num_iters > 0 and (num_iters % args.save_freq == 0 or info["steps"] > args.num_steps):
-                maybe_save_model(savedir, container, {
-                    'replay_buffer': replay_buffer,
-                    'num_iters': num_iters,
-                    'monitor_state': monitored_env.get_state(),
-                })
-
-            if info["steps"] > args.num_steps:
-                break
-
-            if done:
-                steps_left = args.num_steps - info["steps"]
-                completion = np.round(info["steps"] / args.num_steps, 1)
-
-                logger.record_tabular("% completion", completion)
-                logger.record_tabular("steps", info["steps"])
-                logger.record_tabular("iters", num_iters)
-                logger.record_tabular("episodes", len(info["rewards"]))
-                logger.record_tabular("reward (100 epi mean)", np.mean(info["rewards"][-100:]))
-                logger.record_tabular("exploration", exploration.value(num_iters))
-                if args.prioritized:
-                    logger.record_tabular("max priority", replay_buffer._max_priority)
-                fps_estimate = (float(steps_per_iter) / (float(iteration_time_est) + 1e-6)
-                                if steps_per_iter._value is not None else "calculating...")
-                logger.dump_tabular()
-                logger.log()
-                logger.log("ETA: " + pretty_eta(int(steps_left / fps_estimate)))
-                logger.log()
--- a/baselines/deepq/experiments/atari/wang2015_eval.py
+++ b/baselines/deepq/experiments/atari/wang2015_eval.py
@@ -1,82 +0,0 @@
-import argparse
-import gym
-import numpy as np
-import os
-
-import baselines.common.tf_util as U
-
-from baselines import deepq, bench
-from baselines.common.misc_util import get_wrapper_by_name, boolean_flag, set_global_seeds
-from baselines.common.atari_wrappers_deprecated import wrap_dqn
-from baselines.deepq.experiments.atari.model import model, dueling_model
-from baselines.deepq.utils import Uint8Input, load_state
-
-
-def make_env(game_name):
-    env = gym.make(game_name + "NoFrameskip-v4")
-    env_monitored = bench.Monitor(env, None)
-    env = wrap_dqn(env_monitored)
-    return env_monitored, env
-
-
-def parse_args():
-    parser = argparse.ArgumentParser("Evaluate an already learned DQN model.")
-    # Environment
-    parser.add_argument("--env", type=str, required=True, help="name of the game")
-    parser.add_argument("--model-dir", type=str, default=None, help="load model from this directory. ")
-    boolean_flag(parser, "stochastic", default=True, help="whether or not to use stochastic actions according to models eps value")
-    boolean_flag(parser, "dueling", default=False, help="whether or not to use dueling model")
-
-    return parser.parse_args()
-
-
-def wang2015_eval(game_name, act, stochastic):
-    print("==================== wang2015 evaluation ====================")
-    episode_rewards = []
-
-    for num_noops in range(1, 31):
-        env_monitored, eval_env = make_env(game_name)
-        eval_env.unwrapped.seed(1)
-
-        get_wrapper_by_name(eval_env, "NoopResetEnv").override_num_noops = num_noops
-
-        eval_episode_steps = 0
-        done = True
-        while True:
-            if done:
-                obs = eval_env.reset()
-            eval_episode_steps += 1
-            action = act(np.array(obs)[None], stochastic=stochastic)[0]
-
-            obs, _reward, done, info = eval_env.step(action)
-            if done:
-                obs = eval_env.reset()
-            if len(info["rewards"]) > 0:
-                episode_rewards.append(info["rewards"][0])
-                break
-            if info["steps"] > 108000:  # 5 minutes of gameplay
-                episode_rewards.append(sum(env_monitored.rewards))
-                break
-        print("Num steps in episode {} was {} yielding {} reward".format(
-              num_noops, eval_episode_steps, episode_rewards[-1]), flush=True)
-    print("Evaluation results: " + str(np.mean(episode_rewards)))
-    print("=============================================================")
-    return np.mean(episode_rewards)
-
-
-def main():
-    set_global_seeds(1)
-    args = parse_args()
-    with U.make_session(4):  # noqa
-        _, env = make_env(args.env)
-        act = deepq.build_act(
-            make_obs_ph=lambda name: Uint8Input(env.observation_space.shape, name=name),
-            q_func=dueling_model if args.dueling else model,
-            num_actions=env.action_space.n)
-
-        load_state(os.path.join(args.model_dir, "saved"))
-        wang2015_eval(args.env, act, stochastic=args.stochastic)
-
-
-if __name__ == '__main__':
-    main()
--- a/setup.py
+++ b/setup.py
@@ -2,8 +2,8 @@ from setuptools import setup, find_packages
 import sys

 if sys.version_info.major != 3:
-    print("This Python is only compatible with Python 3, but you are running "
-          "Python {}. The installation will likely fail.".format(sys.version_info.major))
+    print('This Python is only compatible with Python 3, but you are running '
+          'Python {}. The installation will likely fail.'.format(sys.version_info.major))


 setup(name='baselines',
@@ -16,13 +16,12 @@ setup(name='baselines',
          'joblib',
          'zmq',
          'dill',
-          'azure==1.0.3',
          'progressbar2',
          'mpi4py',
          'cloudpickle',
      ],
-      description="OpenAI baselines: high quality implementations of reinforcement learning algorithms",
-      author="OpenAI",
+      description='OpenAI baselines: high quality implementations of reinforcement learning algorithms',
+      author='OpenAI',
      url='https://github.com/openai/baselines',
-      author_email="gym@openai.com",
-      version="0.1.4")
+      author_email='gym@openai.com',
+      version='0.1.4')