Blacken the codebase (#2265)

2025-07-31 13:54:31 +00:00 · 2021-07-29 02:26:34 +02:00
parent 2f3133461c
commit bb81e141ea
162 changed files with 5503 additions and 3338 deletions
--- a/.github/workflows/lint_python.yml
+++ b/.github/workflows/lint_python.yml
@@ -8,10 +8,10 @@ jobs:
      - uses: actions/setup-python@v2
      - run: pip install bandit black codespell flake8 isort mypy pytest pyupgrade safety
      - run: bandit --recursive --skip B101,B108,B301,B403,B404,B603 .
-      - run: black --check . || true
+      - run: black --check .
      - run: codespell --ignore-words-list=nd,reacher,thist,ths -w
-      - run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-      - run: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=88 --show-source --statistics
+      - run: flake8 --ignore=E203,E402,E712,E722,E731,E741,F401,F403,F405,F524,F841,W503
+                    --max-complexity=30 --max-line-length=456 --show-source --statistics
      - run: isort --check-only --profile black . || true
      - run: pip install -e .[nomujoco]
      - run: mypy --install-types --non-interactive . || true
--- a/bin/render.py
+++ b/bin/render.py
@@ -3,9 +3,15 @@ import argparse
 import gym


-parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.')
-parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v2')
-parser.add_argument('--step', type=int, default=1)
+parser = argparse.ArgumentParser(
+    description="Renders a Gym environment for quick inspection."
+)
+parser.add_argument(
+    "env_id",
+    type=str,
+    help="the ID of the environment to be rendered (e.g. HalfCheetah-v2",
+)
+parser.add_argument("--step", type=int, default=1)
 args = parser.parse_args()

 env = gym.make(args.env_id)
--- a/examples/agents/_policies.py
+++ b/examples/agents/_policies.py
@@ -1,19 +1,23 @@
 # Support code for cem.py

+
 class BinaryActionLinearPolicy(object):
    def __init__(self, theta):
        self.w = theta[:-1]
        self.b = theta[-1]
+
    def act(self, ob):
        y = ob.dot(self.w) + self.b
        a = int(y < 0)
        return a

+
 class ContinuousActionLinearPolicy(object):
    def __init__(self, theta, n_in, n_out):
        assert len(theta) == (n_in + 1) * n_out
        self.W = theta[0 : n_in * n_out].reshape(n_in, n_out)
        self.b = theta[n_in * n_out : None].reshape(1, n_out)
+
    def act(self, ob):
        a = ob.dot(self.W) + self.b
        return a
--- a/examples/agents/cem.py
+++ b/examples/agents/cem.py
@@ -1,11 +1,16 @@
+import argparse
+import json
+import os
+import pickle
+import sys
+from os import path
+
+import numpy as np
+
 import gym
 from gym import wrappers, logger
-import numpy as np
-import pickle
-import json, sys, os
-from os import path
-from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
-import argparse
+from _policies import BinaryActionLinearPolicy  # Different file so it can be unpickled
+

 def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
    """
@@ -26,17 +31,23 @@ def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
        'ys_mean': mean value of function over current population
        'theta_mean': mean value of the parameter vector over current population
    """
-    n_elite = int(np.round(batch_size*elite_frac))
+    n_elite = int(np.round(batch_size * elite_frac))
    th_std = np.ones_like(th_mean) * initial_std

    for _ in range(n_iter):
-        ths = np.array([th_mean + dth for dth in  th_std[None,:]*np.random.randn(batch_size, th_mean.size)])
+        ths = np.array(
+            [
+                th_mean + dth
+                for dth in th_std[None, :] * np.random.randn(batch_size, th_mean.size)
+            ]
+        )
        ys = np.array([f(th) for th in ths])
        elite_inds = ys.argsort()[::-1][:n_elite]
        elite_ths = ths[elite_inds]
        th_mean = elite_ths.mean(axis=0)
        th_std = elite_ths.std(axis=0)
-        yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()}
+        yield {"ys": ys, "theta_mean": th_mean, "y_mean": ys.mean()}
+

 def do_rollout(agent, env, num_steps, render=False):
    total_rew = 0
@@ -45,16 +56,19 @@ def do_rollout(agent, env, num_steps, render=False):
        a = agent.act(ob)
        (ob, reward, done, _info) = env.step(a)
        total_rew += reward
-        if render and t%3==0: env.render()
-        if done: break
-    return total_rew, t+1
+        if render and t % 3 == 0:
+            env.render()
+        if done:
+            break
+    return total_rew, t + 1

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    logger.set_level(logger.INFO)

    parser = argparse.ArgumentParser()
-    parser.add_argument('--display', action='store_true')
-    parser.add_argument('target', nargs="?", default="CartPole-v0")
+    parser.add_argument("--display", action="store_true")
+    parser.add_argument("target", nargs="?", default="CartPole-v0")
    args = parser.parse_args()

    env = gym.make(args.target)
@@ -66,17 +80,19 @@ if __name__ == '__main__':
    # You provide the directory to write to (can be an existing
    # directory, but can't contain previous monitor results. You can
    # also dump to a tempdir if you'd like: tempfile.mkdtemp().
-    outdir = '/tmp/cem-agent-results'
+    outdir = "/tmp/cem-agent-results"
    env = wrappers.Monitor(env, outdir, force=True)

    # Prepare snapshotting
    # ----------------------------------------
    def writefile(fname, s):
-        with open(path.join(outdir, fname), 'w') as fh: fh.write(s)
+        with open(path.join(outdir, fname), "w") as fh:
+            fh.write(s)
+
    info = {}
-    info['params'] = params
-    info['argv'] = sys.argv
-    info['env_id'] = env.spec.id
+    info["params"] = params
+    info["argv"] = sys.argv
+    info["env_id"] = env.spec.id
    # ------------------------------------------

    def noisy_evaluation(theta):
@@ -86,14 +102,16 @@ if __name__ == '__main__':

    # Train the agent, and snapshot each stage
    for (i, iterdata) in enumerate(
-        cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
-        print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
-        agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
-        if args.display: do_rollout(agent, env, 200, render=True)
-        writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))
+        cem(noisy_evaluation, np.zeros(env.observation_space.shape[0] + 1), **params)
+    ):
+        print("Iteration %2i. Episode mean reward: %7.3f" % (i, iterdata["y_mean"]))
+        agent = BinaryActionLinearPolicy(iterdata["theta_mean"])
+        if args.display:
+            do_rollout(agent, env, 200, render=True)
+        writefile("agent-%.4i.pkl" % i, str(pickle.dumps(agent, -1)))

    # Write out the env at the end so we store the parameters of this
    # environment.
-    writefile('info.json', json.dumps(info))
+    writefile("info.json", json.dumps(info))

    env.close()
--- a/examples/agents/keyboard_agent.py
+++ b/examples/agents/keyboard_agent.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python
-import sys, gym, time
+import sys
+import time
+
+import gym

 #
 # Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
@@ -7,37 +10,45 @@ import sys, gym, time
 # python keyboard_agent.py SpaceInvadersNoFrameskip-v4
 #

-env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
+env = gym.make("LunarLander-v2" if len(sys.argv) < 2 else sys.argv[1])

-if not hasattr(env.action_space, 'n'):
-    raise Exception('Keyboard agent only supports discrete action spaces')
+if not hasattr(env.action_space, "n"):
+    raise Exception("Keyboard agent only supports discrete action spaces")
 ACTIONS = env.action_space.n
-SKIP_CONTROL = 0    # Use previous control decision SKIP_CONTROL times, that's how you
-                    # can test what skip is still usable.
+SKIP_CONTROL = 0  # Use previous control decision SKIP_CONTROL times, that's how you
+# can test what skip is still usable.

 human_agent_action = 0
 human_wants_restart = False
 human_sets_pause = False

+
 def key_press(key, mod):
    global human_agent_action, human_wants_restart, human_sets_pause
-    if key==0xff0d: human_wants_restart = True
-    if key==32: human_sets_pause = not human_sets_pause
-    a = int( key - ord('0') )
-    if a <= 0 or a >= ACTIONS: return
+    if key == 0xFF0D:
+        human_wants_restart = True
+    if key == 32:
+        human_sets_pause = not human_sets_pause
+    a = int(key - ord("0"))
+    if a <= 0 or a >= ACTIONS:
+        return
    human_agent_action = a

+
 def key_release(key, mod):
    global human_agent_action
-    a = int( key - ord('0') )
-    if a <= 0 or a >= ACTIONS: return
+    a = int(key - ord("0"))
+    if a <= 0 or a >= ACTIONS:
+        return
    if human_agent_action == a:
        human_agent_action = 0

+
 env.render()
 env.unwrapped.viewer.window.on_key_press = key_press
 env.unwrapped.viewer.window.on_key_release = key_release

+
 def rollout(env):
    global human_agent_action, human_wants_restart, human_sets_pause
    human_wants_restart = False
@@ -47,7 +58,7 @@ def rollout(env):
    total_timesteps = 0
    while 1:
        if not skip:
-            #print("taking action {}".format(human_agent_action))
+            # print("taking action {}".format(human_agent_action))
            a = human_agent_action
            total_timesteps += 1
            skip = SKIP_CONTROL
@@ -59,20 +70,24 @@ def rollout(env):
            print("reward %0.3f" % r)
        total_reward += r
        window_still_open = env.render()
-        if window_still_open==False: return False
-        if done: break
-        if human_wants_restart: break
+        if window_still_open == False:
+            return False
+        if done:
+            break
+        if human_wants_restart:
+            break
        while human_sets_pause:
            env.render()
            time.sleep(0.1)
        time.sleep(0.1)
    print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))

+
 print("ACTIONS={}".format(ACTIONS))
 print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
 print("No keys pressed is taking action 0")

 while 1:
    window_still_open = rollout(env)
-    if window_still_open==False: break
-
+    if window_still_open == False:
+        break
--- a/examples/agents/random_agent.py
+++ b/examples/agents/random_agent.py
@@ -4,17 +4,22 @@ import sys
 import gym
 from gym import wrappers, logger

+
 class RandomAgent(object):
    """The world's simplest agent!"""
+
    def __init__(self, action_space):
        self.action_space = action_space

    def act(self, observation, reward, done):
        return self.action_space.sample()

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=None)
-    parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run')
+    parser.add_argument(
+        "env_id", nargs="?", default="CartPole-v0", help="Select the environment to run"
+    )
    args = parser.parse_args()

    # You can set the level to logger.DEBUG or logger.WARN if you
@@ -27,7 +32,7 @@ if __name__ == '__main__':
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
-    outdir = '/tmp/random-agent-results'
+    outdir = "/tmp/random-agent-results"
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)
    agent = RandomAgent(env.action_space)
--- a/gym/init.py
+++ b/gym/init.py
@@ -6,7 +6,14 @@ import warnings
 from gym import error
 from gym.version import VERSION as __version__

-from gym.core import Env, GoalEnv, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
+from gym.core import (
+    Env,
+    GoalEnv,
+    Wrapper,
+    ObservationWrapper,
+    ActionWrapper,
+    RewardWrapper,
+)
 from gym.spaces import Space
 from gym.envs import make, spec, register
 from gym import logger
--- a/gym/core.py
+++ b/gym/core.py
@@ -28,9 +28,10 @@ class Env(object):

    The methods are accessed publicly as "step", "reset", etc...
    """
+
    # Set this in SOME subclasses
-    metadata = {'render.modes': []}
-    reward_range = (-float('inf'), float('inf'))
+    metadata = {"render.modes": []}
+    reward_range = (-float("inf"), float("inf"))
    spec = None

    # Set these in ALL subclasses
@@ -70,7 +71,7 @@ class Env(object):
        """
        raise NotImplementedError

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        """Renders the environment.

        The set of supported modes varies per environment. (And some
@@ -145,16 +146,16 @@ class Env(object):

    def __str__(self):
        if self.spec is None:
-            return '<{} instance>'.format(type(self).__name__)
+            return "<{} instance>".format(type(self).__name__)
        else:
-            return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
+            return "<{}<{}>>".format(type(self).__name__, self.spec.id)

    def __enter__(self):
-        """Support with-statement for the environment. """
+        """Support with-statement for the environment."""
        return self

    def __exit__(self, *args):
-        """Support with-statement for the environment. """
+        """Support with-statement for the environment."""
        self.close()
        # propagate exception
        return False
@@ -172,10 +173,16 @@ class GoalEnv(Env):
    def reset(self):
        # Enforce that each GoalEnv uses a Goal-compatible observation space.
        if not isinstance(self.observation_space, gym.spaces.Dict):
-            raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
-        for key in ['observation', 'achieved_goal', 'desired_goal']:
+            raise error.Error(
+                "GoalEnv requires an observation space of type gym.spaces.Dict"
+            )
+        for key in ["observation", "achieved_goal", "desired_goal"]:
            if key not in self.observation_space.spaces:
-                raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))
+                raise error.Error(
+                    'GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(
+                        key
+                    )
+                )

    def compute_reward(self, achieved_goal, desired_goal, info):
        """Compute the step reward. This externalizes the reward function and makes
@@ -210,6 +217,7 @@ class Wrapper(Env):
        Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.

    """
+
    def __init__(self, env):
        self.env = env
        self.action_space = self.env.action_space
@@ -218,8 +226,10 @@ class Wrapper(Env):
        self.metadata = self.env.metadata

    def __getattr__(self, name):
-        if name.startswith('_'):
-            raise AttributeError("attempted to get missing private attribute '{}'".format(name))
+        if name.startswith("_"):
+            raise AttributeError(
+                "attempted to get missing private attribute '{}'".format(name)
+            )
        return getattr(self.env, name)

    @property
@@ -236,7 +246,7 @@ class Wrapper(Env):
    def reset(self, **kwargs):
        return self.env.reset(**kwargs)

-    def render(self, mode='human', **kwargs):
+    def render(self, mode="human", **kwargs):
        return self.env.render(mode, **kwargs)

    def close(self):
@@ -249,7 +259,7 @@ class Wrapper(Env):
        return self.env.compute_reward(achieved_goal, desired_goal, info)

    def __str__(self):
-        return '<{}{}>'.format(type(self).__name__, self.env)
+        return "<{}{}>".format(type(self).__name__, self.env)

    def __repr__(self):
        return str(self)
@@ -295,4 +305,4 @@ class ActionWrapper(Wrapper):
        raise NotImplementedError

    def reverse_action(self, action):
-        raise NotImplementedError
+        raise NotImplementedError
--- a/gym/envs/init.py
+++ b/gym/envs/init.py
--- a/gym/envs/algorithmic/algorithmic_env.py
+++ b/gym/envs/algorithmic/algorithmic_env.py
@@ -42,7 +42,7 @@ from io import StringIO

 class AlgorithmicEnv(Env):

-    metadata = {'render.modes': ['human', 'ansi']}
+    metadata = {"render.modes": ["human", "ansi"]}
    # Only 'promote' the length of generated input strings if the worst of the
    # last n episodes was no more than this far from the maximum reward
    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
@@ -64,10 +64,10 @@ class AlgorithmicEnv(Env):
        # earn and we got 8, we'd append -2
        self.reward_shortfalls = []
        if chars:
-            self.charmap = [chr(ord('A')+i) for i in range(base)]
+            self.charmap = [chr(ord("A") + i) for i in range(base)]
        else:
            self.charmap = [str(i) for i in range(base)]
-        self.charmap.append(' ')
+        self.charmap.append(" ")
        self.min_length = starting_min_length
        # Three sub-actions:
        #       1. Move read head left or right (or up/down)
@@ -111,17 +111,18 @@ class AlgorithmicEnv(Env):
        """Return a string representation of the input tape/grid."""
        raise NotImplementedError

-    def render(self, mode='human'):
-        outfile = StringIO() if mode == 'ansi' else sys.stdout
+    def render(self, mode="human"):
+        outfile = StringIO() if mode == "ansi" else sys.stdout
        inp = "Total length of input instance: %d, step: %d\n" % (
-            self.input_width, self.time
+            self.input_width,
+            self.time,
        )
        outfile.write(inp)
        y, action = self.write_head_position, self.last_action
        if action is not None:
            inp_act, out_act, pred = action
        outfile.write("=" * (len(inp) - 1) + "\n")
-        y_str =      "Output Tape         : "
+        y_str = "Output Tape         : "
        target_str = "Targets             : "
        if action is not None:
            pred_str = self.charmap[pred]
@@ -132,7 +133,7 @@ class AlgorithmicEnv(Env):
                y_str += self._get_str_target(i)
            elif i == (y - 1):
                if action is not None and out_act == 1:
-                    color = 'green' if pred == self.target[i] else 'red'
+                    color = "green" if pred == self.target[i] else "red"
                    y_str += colorize(pred_str, color, highlight=True)
                else:
                    y_str += self._get_str_target(i)
@@ -146,12 +147,15 @@ class AlgorithmicEnv(Env):
            move = self.MOVEMENTS[inp_act]
            outfile.write("Action              :   Tuple(move over input: %s,\n" % move)
            out_act = out_act == 1
-            outfile.write("                              write to the output tape: %s,\n" % out_act)
+            outfile.write(
+                "                              write to the output tape: %s,\n"
+                % out_act
+            )
            outfile.write("                              prediction: %s)\n" % pred_str)
        else:
            outfile.write("\n" * 5)

-        if mode != 'human':
+        if mode != "human":
            with closing(outfile):
                return outfile.getvalue()

@@ -175,7 +179,8 @@ class AlgorithmicEnv(Env):
                    "It looks like you're calling step() even though this "
                    "environment has already returned done=True. You should "
                    "always call reset() once you receive done=True. Any "
-                    "further steps are undefined behaviour.")
+                    "further steps are undefined behaviour."
+                )
                correct = False
            if correct:
                reward = 1.0
@@ -209,10 +214,12 @@ class AlgorithmicEnv(Env):
            # This is before the first episode/call to reset(). Nothing to do.
            return
        self.reward_shortfalls.append(self.episode_total_reward - len(self.target))
-        self.reward_shortfalls = self.reward_shortfalls[-self.last:]
-        if len(self.reward_shortfalls) == self.last and \
-                min(self.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
-                self.min_length < 30:
+        self.reward_shortfalls = self.reward_shortfalls[-self.last :]
+        if (
+            len(self.reward_shortfalls) == self.last
+            and min(self.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION
+            and self.min_length < 30
+        ):
            self.min_length += 1
            self.reward_shortfalls = []

@@ -241,12 +248,13 @@ class AlgorithmicEnv(Env):

 class TapeAlgorithmicEnv(AlgorithmicEnv):
    """An algorithmic env with a 1-d input tape."""
-    MOVEMENTS = ['left', 'right']
+
+    MOVEMENTS = ["left", "right"]
    READ_HEAD_START = 0

    def _move(self, movement):
        named = self.MOVEMENTS[movement]
-        self.read_head_position += 1 if named == 'right' else -1
+        self.read_head_position += 1 if named == "right" else -1

    def _get_obs(self, pos=None):
        if pos is None:
@@ -269,7 +277,7 @@ class TapeAlgorithmicEnv(AlgorithmicEnv):
        for i in range(-2, self.input_width + 2):
            if i == x:
                x_str += colorize(
-                    self._get_str_obs(np.array([i])), 'green', highlight=True
+                    self._get_str_obs(np.array([i])), "green", highlight=True
                )
            else:
                x_str += self._get_str_obs(np.array([i]))
@@ -279,7 +287,8 @@ class TapeAlgorithmicEnv(AlgorithmicEnv):

 class GridAlgorithmicEnv(AlgorithmicEnv):
    """An algorithmic env with a 2-d input grid."""
-    MOVEMENTS = ['left', 'right', 'up', 'down']
+
+    MOVEMENTS = ["left", "right", "up", "down"]
    READ_HEAD_START = (0, 0)

    def __init__(self, rows, *args, **kwargs):
@@ -289,13 +298,13 @@ class GridAlgorithmicEnv(AlgorithmicEnv):
    def _move(self, movement):
        named = self.MOVEMENTS[movement]
        x, y = self.read_head_position
-        if named == 'left':
+        if named == "left":
            x -= 1
-        elif named == 'right':
+        elif named == "right":
            x += 1
-        elif named == 'up':
+        elif named == "up":
            y -= 1
-        elif named == 'down':
+        elif named == "down":
            y += 1
        else:
            raise ValueError("Unrecognized direction: {}".format(named))
@@ -322,13 +331,13 @@ class GridAlgorithmicEnv(AlgorithmicEnv):
        x = self.read_head_position
        label = "Observation Grid    : "
        x_str = ""
-        for j in range(-1, self.rows+1):
+        for j in range(-1, self.rows + 1):
            if j != -1:
                x_str += " " * len(label)
            for i in range(-2, self.input_width + 2):
                if i == x[0] and j == x[1]:
                    x_str += colorize(
-                        self._get_str_obs((i, j)), 'green', highlight=True
+                        self._get_str_obs((i, j)), "green", highlight=True
                    )
                else:
                    x_str += self._get_str_obs((i, j))
--- a/gym/envs/algorithmic/duplicated_input.py
+++ b/gym/envs/algorithmic/duplicated_input.py
@@ -21,6 +21,4 @@ class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
        return res

    def target_from_input_data(self, input_data):
-        return [
-            input_data[i] for i in range(0, len(input_data), self.duplication)
-        ]
+        return [input_data[i] for i in range(0, len(input_data), self.duplication)]
--- a/gym/envs/algorithmic/repeat_copy.py
+++ b/gym/envs/algorithmic/repeat_copy.py
@@ -6,7 +6,7 @@ from gym.envs.algorithmic import algorithmic_env


 class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
-    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
+    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -0.1

    def __init__(self, base=5):
        super(RepeatCopyEnv, self).__init__(base=base, chars=True)
--- a/gym/envs/algorithmic/reverse.py
+++ b/gym/envs/algorithmic/reverse.py
@@ -6,12 +6,10 @@ from gym.envs.algorithmic import algorithmic_env


 class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
-    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
+    MIN_REWARD_SHORTFALL_FOR_PROMOTION = -0.1

    def __init__(self, base=2):
-        super(ReverseEnv, self).__init__(
-            base=base, chars=True, starting_min_length=1
-        )
+        super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
        self.last = 50

    def target_from_input_data(self, input_str):
--- a/gym/envs/algorithmic/reversed_addition.py
+++ b/gym/envs/algorithmic/reversed_addition.py
@@ -3,9 +3,7 @@ from gym.envs.algorithmic import algorithmic_env

 class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
    def __init__(self, rows=2, base=3):
-        super(ReversedAdditionEnv, self).__init__(
-            rows=rows, base=base, chars=False
-        )
+        super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)

    def target_from_input_data(self, input_strings):
        curry = 0
@@ -28,4 +26,4 @@ class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
        # Addition3-v0 unsolvable, since agents aren't even given enough time
        # steps to look at all the digits. (The solutions on the scoreboard
        # seem to only work by save-scumming.)
-        return self.input_width*2 + 4
+        return self.input_width * 2 + 4
--- a/gym/envs/algorithmic/tests/test_algorithmic.py
+++ b/gym/envs/algorithmic/tests/test_algorithmic.py
@@ -11,12 +11,10 @@ ALL_ENVS = [
    alg.reversed_addition.ReversedAdditionEnv,
 ]
 ALL_TAPE_ENVS = [
-    env for env in ALL_ENVS
-    if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)
+    env for env in ALL_ENVS if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)
 ]
 ALL_GRID_ENVS = [
-    env for env in ALL_ENVS
-    if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)
+    env for env in ALL_ENVS if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)
 ]


@@ -30,9 +28,10 @@ def imprint(env, input_arr):
 class TestAlgorithmicEnvInteractions(unittest.TestCase):
    """Test some generic behaviour not specific to any particular algorithmic
    environment. Movement, allocation of rewards, etc."""
+
    CANNED_INPUT = [0, 1]
    ENV_KLS = alg.copy_.CopyEnv
-    LEFT, RIGHT = ENV_KLS._movement_idx('left'), ENV_KLS._movement_idx('right')
+    LEFT, RIGHT = ENV_KLS._movement_idx("left"), ENV_KLS._movement_idx("right")

    def setUp(self):
        self.env = self.ENV_KLS(base=2, chars=True)
@@ -66,11 +65,11 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
            obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
            self.assertTrue(done)
            self.env.reset()
-            if i < self.env.last-1:
-                self.assertEqual(len(self.env.reward_shortfalls), i+1)
+            if i < self.env.last - 1:
+                self.assertEqual(len(self.env.reward_shortfalls), i + 1)
            else:
                # Should have leveled up on the last iteration
-                self.assertEqual(self.env.min_length, min_length+1)
+                self.assertEqual(self.env.min_length, min_length + 1)
                self.assertEqual(len(self.env.reward_shortfalls), 0)

    def test_walk_off_the_end(self):
@@ -95,7 +94,7 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
        env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=6)
        N, S, E, W = [
            env._movement_idx(named_dir)
-            for named_dir in ['up', 'down', 'right', 'left']
+            for named_dir in ["up", "down", "right", "left"]
        ]
        # Corresponds to a grid that looks like...
        #       0 1 2
@@ -105,7 +104,15 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
        obs = env.reset()
        self.assertEqual(obs, 0)
        navigation = [
-            (S, 3), (N, 0), (E, 1), (S, 4), (S, 6), (E, 6), (N, 5), (N, 2), (W, 1)
+            (S, 3),
+            (N, 0),
+            (E, 1),
+            (S, 4),
+            (S, 6),
+            (E, 6),
+            (N, 5),
+            (N, 2),
+            (W, 1),
        ]
        for (movement, expected_obs) in navigation:
            obs, reward, done, _ = env.step([movement, 0, 0])
@@ -138,22 +145,23 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
    def test_rendering(self):
        env = self.env
        env.reset()
-        self.assertEqual(env._get_str_obs(), 'A')
-        self.assertEqual(env._get_str_obs(1), 'B')
-        self.assertEqual(env._get_str_obs(-1), ' ')
-        self.assertEqual(env._get_str_obs(2), ' ')
-        self.assertEqual(env._get_str_target(0), 'A')
-        self.assertEqual(env._get_str_target(1), 'B')
+        self.assertEqual(env._get_str_obs(), "A")
+        self.assertEqual(env._get_str_obs(1), "B")
+        self.assertEqual(env._get_str_obs(-1), " ")
+        self.assertEqual(env._get_str_obs(2), " ")
+        self.assertEqual(env._get_str_target(0), "A")
+        self.assertEqual(env._get_str_target(1), "B")
        # Test numerical alphabet rendering
        env = self.ENV_KLS(base=3, chars=False)
        imprint(env, self.CANNED_INPUT)
        env.reset()
-        self.assertEqual(env._get_str_obs(), '0')
-        self.assertEqual(env._get_str_obs(1), '1')
+        self.assertEqual(env._get_str_obs(), "0")
+        self.assertEqual(env._get_str_obs(1), "1")


 class TestTargets(unittest.TestCase):
    """Test the rules mapping input strings/grids to target outputs."""
+
    def test_reverse_target(self):
        input_expected = [
            ([0], [0]),
@@ -175,9 +183,7 @@ class TestTargets(unittest.TestCase):
            ([[2, 1], [1, 1], [1, 1], [1, 0]], [0, 0, 0, 2]),
        ]
        for (input_grid, expected_target) in input_expected:
-            self.assertEqual(
-                env.target_from_input_data(input_grid), expected_target
-            )
+            self.assertEqual(env.target_from_input_data(input_grid), expected_target)

    def test_reversed_addition_3rows(self):
        env = alg.reversed_addition.ReversedAdditionEnv(base=3, rows=3)
@@ -186,9 +192,7 @@ class TestTargets(unittest.TestCase):
            ([[1, 1, 2], [0, 1, 1]], [1, 0, 1]),
        ]
        for (input_grid, expected_target) in input_expected:
-            self.assertEqual(
-                env.target_from_input_data(input_grid), expected_target
-            )
+            self.assertEqual(env.target_from_input_data(input_grid), expected_target)

    def test_copy_target(self):
        env = alg.copy_.CopyEnv()
@@ -196,9 +200,7 @@ class TestTargets(unittest.TestCase):

    def test_duplicated_input_target(self):
        env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
-        self.assertEqual(
-            env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1]
-        )
+        self.assertEqual(env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1])

    def test_repeat_copy_target(self):
        env = alg.repeat_copy.RepeatCopyEnv()
@@ -208,8 +210,8 @@ class TestTargets(unittest.TestCase):


 class TestInputGeneration(unittest.TestCase):
-    """Test random input generation.
-    """
+    """Test random input generation."""
+
    def test_tape_inputs(self):
        for env_kls in ALL_TAPE_ENVS:
            env = env_kls()
@@ -217,9 +219,7 @@ class TestInputGeneration(unittest.TestCase):
                input_tape = env.generate_input_data(size)
                self.assertTrue(
                    all(0 <= x <= env.base for x in input_tape),
-                    "Invalid input tape from env {}: {}".format(
-                        env_kls, input_tape
-                    )
+                    "Invalid input tape from env {}: {}".format(env_kls, input_tape),
                )
                # DuplicatedInput needs to generate inputs with even length,
                # so it may be short one
@@ -260,5 +260,5 @@ class TestInputGeneration(unittest.TestCase):
        self.assertEqual(input_tape[1], input_tape[2])


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/gym/envs/atari/atari_env.py
+++ b/gym/envs/atari/atari_env.py
@@ -9,8 +9,9 @@ try:
    import atari_py
 except ImportError as e:
    raise error.DependencyNotInstalled(
-            "{}. (HINT: you can install Atari dependencies by running "
-            "'pip install gym[atari]'.)".format(e))
+        "{}. (HINT: you can install Atari dependencies by running "
+        "'pip install gym[atari]'.)".format(e)
+    )


 def to_ram(ale):
@@ -21,30 +22,32 @@ def to_ram(ale):


 class AtariEnv(gym.Env, utils.EzPickle):
-    metadata = {'render.modes': ['human', 'rgb_array']}
+    metadata = {"render.modes": ["human", "rgb_array"]}

    def __init__(
-            self,
-            game='pong',
-            mode=None,
-            difficulty=None,
-            obs_type='ram',
-            frameskip=(2, 5),
-            repeat_action_probability=0.,
-            full_action_space=False):
+        self,
+        game="pong",
+        mode=None,
+        difficulty=None,
+        obs_type="ram",
+        frameskip=(2, 5),
+        repeat_action_probability=0.0,
+        full_action_space=False,
+    ):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(
-                self,
-                game,
-                mode,
-                difficulty,
-                obs_type,
-                frameskip,
-                repeat_action_probability,
-                full_action_space)
-        assert obs_type in ('ram', 'image')
+            self,
+            game,
+            mode,
+            difficulty,
+            obs_type,
+            frameskip,
+            repeat_action_probability,
+            full_action_space,
+        )
+        assert obs_type in ("ram", "image")

        self.game = game
        self.game_path = atari_py.get_game_path(game)
@@ -52,7 +55,7 @@ class AtariEnv(gym.Env, utils.EzPickle):
        self.game_difficulty = difficulty

        if not os.path.exists(self.game_path):
-            msg = 'You asked for game %s but path %s does not exist'
+            msg = "You asked for game %s but path %s does not exist"
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
@@ -61,41 +64,51 @@ class AtariEnv(gym.Env, utils.EzPickle):

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
-        assert isinstance(repeat_action_probability, (float, int)), \
-                "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
+        assert isinstance(
+            repeat_action_probability, (float, int)
+        ), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat(
-                'repeat_action_probability'.encode('utf-8'),
-                repeat_action_probability)
+            "repeat_action_probability".encode("utf-8"), repeat_action_probability
+        )

        self.seed()

-        self._action_set = (self.ale.getLegalActionSet() if full_action_space
-                            else self.ale.getMinimalActionSet())
+        self._action_set = (
+            self.ale.getLegalActionSet()
+            if full_action_space
+            else self.ale.getMinimalActionSet()
+        )
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
-        if self._obs_type == 'ram':
-            self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
-        elif self._obs_type == 'image':
-            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
+        if self._obs_type == "ram":
+            self.observation_space = spaces.Box(
+                low=0, high=255, dtype=np.uint8, shape=(128,)
+            )
+        elif self._obs_type == "image":
+            self.observation_space = spaces.Box(
+                low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8
+            )
        else:
-            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
+            raise error.Error(
+                "Unrecognized observation type: {}".format(self._obs_type)
+            )

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
-        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
+        seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
        # Empirically, we need to seed before loading the ROM.
-        self.ale.setInt(b'random_seed', seed2)
+        self.ale.setInt(b"random_seed", seed2)
        self.ale.loadROM(self.game_path)

        if self.game_mode is not None:
            modes = self.ale.getAvailableModes()

            assert self.game_mode in modes, (
-                "Invalid game mode \"{}\" for game {}.\nAvailable modes are: {}"
+                'Invalid game mode "{}" for game {}.\nAvailable modes are: {}'
            ).format(self.game_mode, self.game, modes)
            self.ale.setMode(self.game_mode)

@@ -103,7 +116,7 @@ class AtariEnv(gym.Env, utils.EzPickle):
            difficulties = self.ale.getAvailableDifficulties()

            assert self.game_difficulty in difficulties, (
-                "Invalid game difficulty \"{}\" for game {}.\nAvailable difficulties are: {}"
+                'Invalid game difficulty "{}" for game {}.\nAvailable difficulties are: {}'
            ).format(self.game_difficulty, self.game, difficulties)
            self.ale.setDifficulty(self.game_difficulty)

@@ -134,9 +147,9 @@ class AtariEnv(gym.Env, utils.EzPickle):
        return len(self._action_set)

    def _get_obs(self):
-        if self._obs_type == 'ram':
+        if self._obs_type == "ram":
            return self._get_ram()
-        elif self._obs_type == 'image':
+        elif self._obs_type == "image":
            img = self._get_image()
        return img

@@ -145,12 +158,13 @@ class AtariEnv(gym.Env, utils.EzPickle):
        self.ale.reset_game()
        return self._get_obs()

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        img = self._get_image()
-        if mode == 'rgb_array':
+        if mode == "rgb_array":
            return img
-        elif mode == 'human':
+        elif mode == "human":
            from gym.envs.classic_control import rendering
+
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)
@@ -166,11 +180,11 @@ class AtariEnv(gym.Env, utils.EzPickle):

    def get_keys_to_action(self):
        KEYWORD_TO_KEY = {
-            'UP':      ord('w'),
-            'DOWN':    ord('s'),
-            'LEFT':    ord('a'),
-            'RIGHT':   ord('d'),
-            'FIRE':    ord(' '),
+            "UP": ord("w"),
+            "DOWN": ord("s"),
+            "LEFT": ord("a"),
+            "RIGHT": ord("d"),
+            "FIRE": ord(" "),
        }

        keys_to_action = {}
--- a/gym/envs/box2d/bipedal_walker.py
+++ b/gym/envs/box2d/bipedal_walker.py
@@ -3,7 +3,14 @@ import math

 import numpy as np
 import Box2D
-from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+from Box2D.b2 import (
+    edgeShape,
+    circleShape,
+    fixtureDef,
+    polygonShape,
+    revoluteJointDef,
+    contactListener,
+)

 import gym
 from gym import spaces
@@ -37,75 +44,79 @@ from gym.utils import colorize, seeding, EzPickle
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.

-FPS    = 50
-SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
+FPS = 50
+SCALE = 30.0  # affects how fast-paced the game is, forces should be adjusted as well

 MOTORS_TORQUE = 80
-SPEED_HIP     = 4
-SPEED_KNEE    = 6
-LIDAR_RANGE   = 160/SCALE
+SPEED_HIP = 4
+SPEED_KNEE = 6
+LIDAR_RANGE = 160 / SCALE

 INITIAL_RANDOM = 5

-HULL_POLY =[
-    (-30,+9), (+6,+9), (+34,+1),
-    (+34,-8), (-30,-8)
-    ]
-LEG_DOWN = -8/SCALE
-LEG_W, LEG_H = 8/SCALE, 34/SCALE
+HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
+LEG_DOWN = -8 / SCALE
+LEG_W, LEG_H = 8 / SCALE, 34 / SCALE

 VIEWPORT_W = 600
 VIEWPORT_H = 400

-TERRAIN_STEP   = 14/SCALE
-TERRAIN_LENGTH = 200     # in steps
-TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
-TERRAIN_GRASS    = 10    # low long are grass spots, in steps
-TERRAIN_STARTPAD = 20    # in steps
+TERRAIN_STEP = 14 / SCALE
+TERRAIN_LENGTH = 200  # in steps
+TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
+TERRAIN_GRASS = 10  # low long are grass spots, in steps
+TERRAIN_STARTPAD = 20  # in steps
 FRICTION = 2.5

 HULL_FD = fixtureDef(
-                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
-                density=5.0,
-                friction=0.1,
-                categoryBits=0x0020,
-                maskBits=0x001,  # collide only with ground
-                restitution=0.0) # 0.99 bouncy
+    shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
+    density=5.0,
+    friction=0.1,
+    categoryBits=0x0020,
+    maskBits=0x001,  # collide only with ground
+    restitution=0.0,
+)  # 0.99 bouncy

 LEG_FD = fixtureDef(
-                    shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
-                    density=1.0,
-                    restitution=0.0,
-                    categoryBits=0x0020,
-                    maskBits=0x001)
+    shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
+    density=1.0,
+    restitution=0.0,
+    categoryBits=0x0020,
+    maskBits=0x001,
+)

 LOWER_FD = fixtureDef(
-                    shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
-                    density=1.0,
-                    restitution=0.0,
-                    categoryBits=0x0020,
-                    maskBits=0x001)
+    shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
+    density=1.0,
+    restitution=0.0,
+    categoryBits=0x0020,
+    maskBits=0x001,
+)
+

 class ContactDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
+
    def BeginContact(self, contact):
-        if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
+        if (
+            self.env.hull == contact.fixtureA.body
+            or self.env.hull == contact.fixtureB.body
+        ):
            self.env.game_over = True
        for leg in [self.env.legs[1], self.env.legs[3]]:
            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
                leg.ground_contact = True
+
    def EndContact(self, contact):
        for leg in [self.env.legs[1], self.env.legs[3]]:
            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
                leg.ground_contact = False

+
 class BipedalWalker(gym.Env, EzPickle):
-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second' : FPS
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}

    hardcore = False

@@ -121,25 +132,23 @@ class BipedalWalker(gym.Env, EzPickle):
        self.prev_shaping = None

        self.fd_polygon = fixtureDef(
-                        shape = polygonShape(vertices=
-                        [(0, 0),
-                         (1, 0),
-                         (1, -1),
-                         (0, -1)]),
-                        friction = FRICTION)
+            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
+            friction=FRICTION,
+        )

        self.fd_edge = fixtureDef(
-                    shape = edgeShape(vertices=
-                    [(0, 0),
-                     (1, 1)]),
-                    friction = FRICTION,
-                    categoryBits=0x0001,
-                )
+            shape=edgeShape(vertices=[(0, 0), (1, 1)]),
+            friction=FRICTION,
+            categoryBits=0x0001,
+        )

        self.reset()

        high = np.array([np.inf] * 24).astype(np.float32)
-        self.action_space = spaces.Box(np.array([-1, -1, -1, -1]).astype(np.float32), np.array([1, 1, 1, 1]).astype(np.float32))
+        self.action_space = spaces.Box(
+            np.array([-1, -1, -1, -1]).astype(np.float32),
+            np.array([1, 1, 1, 1]).astype(np.float32),
+        )
        self.observation_space = spaces.Box(-high, high)

    def seed(self, seed=None):
@@ -147,7 +156,8 @@ class BipedalWalker(gym.Env, EzPickle):
        return [seed]

    def _destroy(self):
-        if not self.terrain: return
+        if not self.terrain:
+            return
        self.world.contactListener = None
        for t in self.terrain:
            self.world.DestroyBody(t)
@@ -161,94 +171,105 @@ class BipedalWalker(gym.Env, EzPickle):

    def _generate_terrain(self, hardcore):
        GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
-        state    = GRASS
+        state = GRASS
        velocity = 0.0
-        y        = TERRAIN_HEIGHT
-        counter  = TERRAIN_STARTPAD
-        oneshot  = False
-        self.terrain   = []
+        y = TERRAIN_HEIGHT
+        counter = TERRAIN_STARTPAD
+        oneshot = False
+        self.terrain = []
        self.terrain_x = []
        self.terrain_y = []
        for i in range(TERRAIN_LENGTH):
-            x = i*TERRAIN_STEP
+            x = i * TERRAIN_STEP
            self.terrain_x.append(x)

-            if state==GRASS and not oneshot:
-                velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
-                if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE   #1
+            if state == GRASS and not oneshot:
+                velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
+                if i > TERRAIN_STARTPAD:
+                    velocity += self.np_random.uniform(-1, 1) / SCALE  # 1
                y += velocity

-            elif state==PIT and oneshot:
+            elif state == PIT and oneshot:
                counter = self.np_random.randint(3, 5)
                poly = [
-                    (x,              y),
-                    (x+TERRAIN_STEP, y),
-                    (x+TERRAIN_STEP, y-4*TERRAIN_STEP),
-                    (x,              y-4*TERRAIN_STEP),
-                    ]
-                self.fd_polygon.shape.vertices=poly
-                t = self.world.CreateStaticBody(
-                    fixtures = self.fd_polygon)
-                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                    (x, y),
+                    (x + TERRAIN_STEP, y),
+                    (x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
+                    (x, y - 4 * TERRAIN_STEP),
+                ]
+                self.fd_polygon.shape.vertices = poly
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
                self.terrain.append(t)

-                self.fd_polygon.shape.vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]
-                t = self.world.CreateStaticBody(
-                    fixtures = self.fd_polygon)
-                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                self.fd_polygon.shape.vertices = [
+                    (p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
+                ]
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
                self.terrain.append(t)
                counter += 2
                original_y = y

-            elif state==PIT and not oneshot:
+            elif state == PIT and not oneshot:
                y = original_y
                if counter > 1:
-                    y -= 4*TERRAIN_STEP
+                    y -= 4 * TERRAIN_STEP

-            elif state==STUMP and oneshot:
+            elif state == STUMP and oneshot:
                counter = self.np_random.randint(1, 3)
                poly = [
-                    (x,                      y),
-                    (x+counter*TERRAIN_STEP, y),
-                    (x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
-                    (x,                      y+counter*TERRAIN_STEP),
-                    ]
-                self.fd_polygon.shape.vertices=poly
-                t = self.world.CreateStaticBody(
-                    fixtures = self.fd_polygon)
-                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                    (x, y),
+                    (x + counter * TERRAIN_STEP, y),
+                    (x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
+                    (x, y + counter * TERRAIN_STEP),
+                ]
+                self.fd_polygon.shape.vertices = poly
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
                self.terrain.append(t)

-            elif state==STAIRS and oneshot:
+            elif state == STAIRS and oneshot:
                stair_height = +1 if self.np_random.rand() > 0.5 else -1
                stair_width = self.np_random.randint(4, 5)
                stair_steps = self.np_random.randint(3, 5)
                original_y = y
                for s in range(stair_steps):
                    poly = [
-                        (x+(    s*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
-                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
-                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
-                        (x+(    s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
-                        ]
-                    self.fd_polygon.shape.vertices=poly
-                    t = self.world.CreateStaticBody(
-                        fixtures = self.fd_polygon)
-                    t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                        (
+                            x + (s * stair_width) * TERRAIN_STEP,
+                            y + (s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + ((1 + s) * stair_width) * TERRAIN_STEP,
+                            y + (s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + ((1 + s) * stair_width) * TERRAIN_STEP,
+                            y + (-1 + s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + (s * stair_width) * TERRAIN_STEP,
+                            y + (-1 + s * stair_height) * TERRAIN_STEP,
+                        ),
+                    ]
+                    self.fd_polygon.shape.vertices = poly
+                    t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                    t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
                    self.terrain.append(t)
-                counter = stair_steps*stair_width
+                counter = stair_steps * stair_width

-            elif state==STAIRS and not oneshot:
-                s = stair_steps*stair_width - counter - stair_height
-                n = s/stair_width
-                y = original_y + (n*stair_height)*TERRAIN_STEP
+            elif state == STAIRS and not oneshot:
+                s = stair_steps * stair_width - counter - stair_height
+                n = s / stair_width
+                y = original_y + (n * stair_height) * TERRAIN_STEP

            oneshot = False
            self.terrain_y.append(y)
            counter -= 1
-            if counter==0:
-                counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
-                if state==GRASS and hardcore:
+            if counter == 0:
+                counter = self.np_random.randint(TERRAIN_GRASS / 2, TERRAIN_GRASS)
+                if state == GRASS and hardcore:
                    state = self.np_random.randint(1, _STATES_)
                    oneshot = True
                else:
@@ -256,36 +277,42 @@ class BipedalWalker(gym.Env, EzPickle):
                    oneshot = True

        self.terrain_poly = []
-        for i in range(TERRAIN_LENGTH-1):
+        for i in range(TERRAIN_LENGTH - 1):
            poly = [
-                (self.terrain_x[i],   self.terrain_y[i]),
-                (self.terrain_x[i+1], self.terrain_y[i+1])
-                ]
-            self.fd_edge.shape.vertices=poly
-            t = self.world.CreateStaticBody(
-                fixtures = self.fd_edge)
-            color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
+                (self.terrain_x[i], self.terrain_y[i]),
+                (self.terrain_x[i + 1], self.terrain_y[i + 1]),
+            ]
+            self.fd_edge.shape.vertices = poly
+            t = self.world.CreateStaticBody(fixtures=self.fd_edge)
+            color = (0.3, 1.0 if i % 2 == 0 else 0.8, 0.3)
            t.color1 = color
            t.color2 = color
            self.terrain.append(t)
            color = (0.4, 0.6, 0.3)
-            poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
-            self.terrain_poly.append( (poly, color) )
+            poly += [(poly[1][0], 0), (poly[0][0], 0)]
+            self.terrain_poly.append((poly, color))
        self.terrain.reverse()

    def _generate_clouds(self):
        # Sorry for the clouds, couldn't resist
-        self.cloud_poly   = []
-        for i in range(TERRAIN_LENGTH//20):
-            x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
-            y = VIEWPORT_H/SCALE*3/4
+        self.cloud_poly = []
+        for i in range(TERRAIN_LENGTH // 20):
+            x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
+            y = VIEWPORT_H / SCALE * 3 / 4
            poly = [
-                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
-                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
-                for a in range(5) ]
-            x1 = min( [p[0] for p in poly] )
-            x2 = max( [p[0] for p in poly] )
-            self.cloud_poly.append( (poly,x1,x2) )
+                (
+                    x
+                    + 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
+                    + self.np_random.uniform(0, 5 * TERRAIN_STEP),
+                    y
+                    + 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
+                    + self.np_random.uniform(0, 5 * TERRAIN_STEP),
+                )
+                for a in range(5)
+            ]
+            x1 = min([p[0] for p in poly])
+            x2 = max([p[0] for p in poly])
+            self.cloud_poly.append((poly, x1, x2))

    def reset(self):
        self._destroy()
@@ -296,66 +323,67 @@ class BipedalWalker(gym.Env, EzPickle):
        self.scroll = 0.0
        self.lidar_render = 0

-        W = VIEWPORT_W/SCALE
-        H = VIEWPORT_H/SCALE
+        W = VIEWPORT_W / SCALE
+        H = VIEWPORT_H / SCALE

        self._generate_terrain(self.hardcore)
        self._generate_clouds()

-        init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
-        init_y = TERRAIN_HEIGHT+2*LEG_H
+        init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
+        init_y = TERRAIN_HEIGHT + 2 * LEG_H
        self.hull = self.world.CreateDynamicBody(
-            position = (init_x, init_y),
-            fixtures = HULL_FD
-                )
-        self.hull.color1 = (0.5,0.4,0.9)
-        self.hull.color2 = (0.3,0.3,0.5)
-        self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
+            position=(init_x, init_y), fixtures=HULL_FD
+        )
+        self.hull.color1 = (0.5, 0.4, 0.9)
+        self.hull.color2 = (0.3, 0.3, 0.5)
+        self.hull.ApplyForceToCenter(
+            (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
+        )

        self.legs = []
        self.joints = []
-        for i in [-1,+1]:
+        for i in [-1, +1]:
            leg = self.world.CreateDynamicBody(
-                position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
-                angle = (i*0.05),
-                fixtures = LEG_FD
-                )
-            leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
-            leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
+                position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
+                angle=(i * 0.05),
+                fixtures=LEG_FD,
+            )
+            leg.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
+            leg.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
            rjd = revoluteJointDef(
                bodyA=self.hull,
                bodyB=leg,
                localAnchorA=(0, LEG_DOWN),
-                localAnchorB=(0, LEG_H/2),
+                localAnchorB=(0, LEG_H / 2),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=MOTORS_TORQUE,
-                motorSpeed = i,
-                lowerAngle = -0.8,
-                upperAngle = 1.1,
-                )
+                motorSpeed=i,
+                lowerAngle=-0.8,
+                upperAngle=1.1,
+            )
            self.legs.append(leg)
            self.joints.append(self.world.CreateJoint(rjd))

            lower = self.world.CreateDynamicBody(
-                position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
-                angle = (i*0.05),
-                fixtures = LOWER_FD
-                )
-            lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
-            lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
+                position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
+                angle=(i * 0.05),
+                fixtures=LOWER_FD,
+            )
+            lower.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
+            lower.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
            rjd = revoluteJointDef(
                bodyA=leg,
                bodyB=lower,
-                localAnchorA=(0, -LEG_H/2),
-                localAnchorB=(0, LEG_H/2),
+                localAnchorA=(0, -LEG_H / 2),
+                localAnchorB=(0, LEG_H / 2),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=MOTORS_TORQUE,
-                motorSpeed = 1,
-                lowerAngle = -1.6,
-                upperAngle = -0.1,
-                )
+                motorSpeed=1,
+                lowerAngle=-1.6,
+                upperAngle=-0.1,
+            )
            lower.ground_contact = False
            self.legs.append(lower)
            self.joints.append(self.world.CreateJoint(rjd))
@@ -369,29 +397,38 @@ class BipedalWalker(gym.Env, EzPickle):
                self.p2 = point
                self.fraction = fraction
                return fraction
+
        self.lidar = [LidarCallback() for _ in range(10)]

-        return self.step(np.array([0,0,0,0]))[0]
+        return self.step(np.array([0, 0, 0, 0]))[0]

    def step(self, action):
-        #self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
+        # self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
        control_speed = False  # Should be easier as well
        if control_speed:
-            self.joints[0].motorSpeed = float(SPEED_HIP  * np.clip(action[0], -1, 1))
+            self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
            self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
-            self.joints[2].motorSpeed = float(SPEED_HIP  * np.clip(action[2], -1, 1))
+            self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
            self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
        else:
-            self.joints[0].motorSpeed     = float(SPEED_HIP     * np.sign(action[0]))
-            self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
-            self.joints[1].motorSpeed     = float(SPEED_KNEE    * np.sign(action[1]))
-            self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
-            self.joints[2].motorSpeed     = float(SPEED_HIP     * np.sign(action[2]))
-            self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
-            self.joints[3].motorSpeed     = float(SPEED_KNEE    * np.sign(action[3]))
-            self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
+            self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
+            self.joints[0].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
+            )
+            self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
+            self.joints[1].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
+            )
+            self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
+            self.joints[2].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
+            )
+            self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
+            self.joints[3].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
+            )

-        self.world.Step(1.0/FPS, 6*30, 2*30)
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)

        pos = self.hull.position
        vel = self.hull.linearVelocity
@@ -400,16 +437,19 @@ class BipedalWalker(gym.Env, EzPickle):
            self.lidar[i].fraction = 1.0
            self.lidar[i].p1 = pos
            self.lidar[i].p2 = (
-                pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
-                pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
+                pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
+                pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
+            )
            self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)

        state = [
-            self.hull.angle,        # Normal angles up to 0.5 here, but sure more is possible.
-            2.0*self.hull.angularVelocity/FPS,
-            0.3*vel.x*(VIEWPORT_W/SCALE)/FPS,  # Normalized to get -1..1 range
-            0.3*vel.y*(VIEWPORT_H/SCALE)/FPS,
-            self.joints[0].angle,   # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
+            self.hull.angle,  # Normal angles up to 0.5 here, but sure more is possible.
+            2.0 * self.hull.angularVelocity / FPS,
+            0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS,  # Normalized to get -1..1 range
+            0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
+            self.joints[
+                0
+            ].angle,  # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
            self.joints[0].speed / SPEED_HIP,
            self.joints[1].angle + 1.0,
            self.joints[1].speed / SPEED_KNEE,
@@ -418,15 +458,19 @@ class BipedalWalker(gym.Env, EzPickle):
            self.joints[2].speed / SPEED_HIP,
            self.joints[3].angle + 1.0,
            self.joints[3].speed / SPEED_KNEE,
-            1.0 if self.legs[3].ground_contact else 0.0
-            ]
+            1.0 if self.legs[3].ground_contact else 0.0,
+        ]
        state += [l.fraction for l in self.lidar]
-        assert len(state)==24
+        assert len(state) == 24

-        self.scroll = pos.x - VIEWPORT_W/SCALE/5
+        self.scroll = pos.x - VIEWPORT_W / SCALE / 5

-        shaping  = 130*pos[0]/SCALE   # moving forward is a way to receive reward (normalized to get 300 on completion)
-        shaping -= 5.0*abs(state[0])  # keep head straight, other than that and falling, any behavior is unpunished
+        shaping = (
+            130 * pos[0] / SCALE
+        )  # moving forward is a way to receive reward (normalized to get 300 on completion)
+        shaping -= 5.0 * abs(
+            state[0]
+        )  # keep head straight, other than that and falling, any behavior is unpunished

        reward = 0
        if self.prev_shaping is not None:
@@ -440,77 +484,105 @@ class BipedalWalker(gym.Env, EzPickle):
        done = False
        if self.game_over or pos[0] < 0:
            reward = -100
-            done   = True
-        if pos[0] > (TERRAIN_LENGTH-TERRAIN_GRASS)*TERRAIN_STEP:
-            done   = True
+            done = True
+        if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
+            done = True
        return np.array(state), reward, done, {}

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        from gym.envs.classic_control import rendering
+
        if self.viewer is None:
            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
-        self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
+        self.viewer.set_bounds(
+            self.scroll, VIEWPORT_W / SCALE + self.scroll, 0, VIEWPORT_H / SCALE
+        )

-        self.viewer.draw_polygon( [
-            (self.scroll,                  0),
-            (self.scroll+VIEWPORT_W/SCALE, 0),
-            (self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
-            (self.scroll,                  VIEWPORT_H/SCALE),
-            ], color=(0.9, 0.9, 1.0) )
-        for poly,x1,x2 in self.cloud_poly:
-            if x2 < self.scroll/2: continue
-            if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
-            self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
+        self.viewer.draw_polygon(
+            [
+                (self.scroll, 0),
+                (self.scroll + VIEWPORT_W / SCALE, 0),
+                (self.scroll + VIEWPORT_W / SCALE, VIEWPORT_H / SCALE),
+                (self.scroll, VIEWPORT_H / SCALE),
+            ],
+            color=(0.9, 0.9, 1.0),
+        )
+        for poly, x1, x2 in self.cloud_poly:
+            if x2 < self.scroll / 2:
+                continue
+            if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
+                continue
+            self.viewer.draw_polygon(
+                [(p[0] + self.scroll / 2, p[1]) for p in poly], color=(1, 1, 1)
+            )
        for poly, color in self.terrain_poly:
-            if poly[1][0] < self.scroll: continue
-            if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
+            if poly[1][0] < self.scroll:
+                continue
+            if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
+                continue
            self.viewer.draw_polygon(poly, color=color)

-        self.lidar_render = (self.lidar_render+1) % 100
+        self.lidar_render = (self.lidar_render + 1) % 100
        i = self.lidar_render
-        if i < 2*len(self.lidar):
-            l = self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar)-i-1]
-            self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
+        if i < 2 * len(self.lidar):
+            l = (
+                self.lidar[i]
+                if i < len(self.lidar)
+                else self.lidar[len(self.lidar) - i - 1]
+            )
+            self.viewer.draw_polyline([l.p1, l.p2], color=(1, 0, 0), linewidth=1)

        for obj in self.drawlist:
            for f in obj.fixtures:
                trans = f.body.transform
                if type(f.shape) is circleShape:
-                    t = rendering.Transform(translation=trans*f.shape.pos)
-                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
-                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                    t = rendering.Transform(translation=trans * f.shape.pos)
+                    self.viewer.draw_circle(
+                        f.shape.radius, 30, color=obj.color1
+                    ).add_attr(t)
+                    self.viewer.draw_circle(
+                        f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2
+                    ).add_attr(t)
                else:
-                    path = [trans*v for v in f.shape.vertices]
+                    path = [trans * v for v in f.shape.vertices]
                    self.viewer.draw_polygon(path, color=obj.color1)
                    path.append(path[0])
                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)

        flagy1 = TERRAIN_HEIGHT
-        flagy2 = flagy1 + 50/SCALE
-        x = TERRAIN_STEP*3
-        self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
-        f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
-        self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
-        self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
+        flagy2 = flagy1 + 50 / SCALE
+        x = TERRAIN_STEP * 3
+        self.viewer.draw_polyline(
+            [(x, flagy1), (x, flagy2)], color=(0, 0, 0), linewidth=2
+        )
+        f = [
+            (x, flagy2),
+            (x, flagy2 - 10 / SCALE),
+            (x + 25 / SCALE, flagy2 - 5 / SCALE),
+        ]
+        self.viewer.draw_polygon(f, color=(0.9, 0.2, 0))
+        self.viewer.draw_polyline(f + [f[0]], color=(0, 0, 0), linewidth=2)

-        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

+
 class BipedalWalkerHardcore(BipedalWalker):
    hardcore = True

-if __name__=="__main__":
+
+if __name__ == "__main__":
    # Heurisic: suboptimal, have no notion of balance.
    env = BipedalWalker()
    env.reset()
    steps = 0
    total_reward = 0
    a = np.array([0.0, 0.0, 0.0, 0.0])
-    STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1,2,3
+    STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
    SPEED = 0.29  # Will fall forward on higher speed
    state = STAY_ON_ONE_LEG
    moving_leg = 0
@@ -523,60 +595,66 @@ if __name__=="__main__":
        if steps % 20 == 0 or done:
            print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
            print("step {} total_reward {:+0.2f}".format(steps, total_reward))
-            print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4] ]))
-            print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9] ]))
+            print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4]]))
+            print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9]]))
            print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
        steps += 1

        contact0 = s[8]
        contact1 = s[13]
-        moving_s_base = 4 + 5*moving_leg
-        supporting_s_base = 4 + 5*supporting_leg
+        moving_s_base = 4 + 5 * moving_leg
+        supporting_s_base = 4 + 5 * supporting_leg

-        hip_targ  = [None,None]   # -0.8 .. +1.1
-        knee_targ = [None,None]   # -0.6 .. +0.9
-        hip_todo  = [0.0, 0.0]
+        hip_targ = [None, None]  # -0.8 .. +1.1
+        knee_targ = [None, None]  # -0.6 .. +0.9
+        hip_todo = [0.0, 0.0]
        knee_todo = [0.0, 0.0]

-        if state==STAY_ON_ONE_LEG:
-            hip_targ[moving_leg]  = 1.1
+        if state == STAY_ON_ONE_LEG:
+            hip_targ[moving_leg] = 1.1
            knee_targ[moving_leg] = -0.6
            supporting_knee_angle += 0.03
-            if s[2] > SPEED: supporting_knee_angle += 0.03
-            supporting_knee_angle = min( supporting_knee_angle, SUPPORT_KNEE_ANGLE )
+            if s[2] > SPEED:
+                supporting_knee_angle += 0.03
+            supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
            knee_targ[supporting_leg] = supporting_knee_angle
-            if s[supporting_s_base+0] < 0.10: # supporting leg is behind
+            if s[supporting_s_base + 0] < 0.10:  # supporting leg is behind
                state = PUT_OTHER_DOWN
-        if state==PUT_OTHER_DOWN:
-            hip_targ[moving_leg]  = +0.1
+        if state == PUT_OTHER_DOWN:
+            hip_targ[moving_leg] = +0.1
            knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
            knee_targ[supporting_leg] = supporting_knee_angle
-            if s[moving_s_base+4]:
+            if s[moving_s_base + 4]:
                state = PUSH_OFF
-                supporting_knee_angle = min( s[moving_s_base+2], SUPPORT_KNEE_ANGLE )
-        if state==PUSH_OFF:
+                supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
+        if state == PUSH_OFF:
            knee_targ[moving_leg] = supporting_knee_angle
            knee_targ[supporting_leg] = +1.0
-            if s[supporting_s_base+2] > 0.88 or s[2] > 1.2*SPEED:
+            if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
                state = STAY_ON_ONE_LEG
                moving_leg = 1 - moving_leg
                supporting_leg = 1 - moving_leg

-        if hip_targ[0]: hip_todo[0] = 0.9*(hip_targ[0] - s[4]) - 0.25*s[5]
-        if hip_targ[1]: hip_todo[1] = 0.9*(hip_targ[1] - s[9]) - 0.25*s[10]
-        if knee_targ[0]: knee_todo[0] = 4.0*(knee_targ[0] - s[6])  - 0.25*s[7]
-        if knee_targ[1]: knee_todo[1] = 4.0*(knee_targ[1] - s[11]) - 0.25*s[12]
+        if hip_targ[0]:
+            hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
+        if hip_targ[1]:
+            hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
+        if knee_targ[0]:
+            knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
+        if knee_targ[1]:
+            knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]

-        hip_todo[0] -= 0.9*(0-s[0]) - 1.5*s[1] # PID to keep head strait
-        hip_todo[1] -= 0.9*(0-s[0]) - 1.5*s[1]
-        knee_todo[0] -= 15.0*s[3]  # vertical speed, to damp oscillations
-        knee_todo[1] -= 15.0*s[3]
+        hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1]  # PID to keep head strait
+        hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
+        knee_todo[0] -= 15.0 * s[3]  # vertical speed, to damp oscillations
+        knee_todo[1] -= 15.0 * s[3]

        a[0] = hip_todo[0]
        a[1] = knee_todo[0]
        a[2] = hip_todo[1]
        a[3] = knee_todo[1]
-        a = np.clip(0.5*a, -1.0, 1.0)
+        a = np.clip(0.5 * a, -1.0, 1.0)

        env.render()
-        if done: break
+        if done:
+            break
--- a/gym/envs/box2d/car_dynamics.py
+++ b/gym/envs/box2d/car_dynamics.py
@@ -10,26 +10,27 @@ Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 import numpy as np
 import math
 import Box2D
-from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
+from Box2D.b2 import (
+    edgeShape,
+    circleShape,
+    fixtureDef,
+    polygonShape,
+    revoluteJointDef,
+    contactListener,
+    shape,
+)

 SIZE = 0.02
-ENGINE_POWER = 100000000*SIZE*SIZE
-WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
-FRICTION_LIMIT = 1000000*SIZE*SIZE     # friction ~= mass ~= size^2 (calculated implicitly using density)
+ENGINE_POWER = 100000000 * SIZE * SIZE
+WHEEL_MOMENT_OF_INERTIA = 4000 * SIZE * SIZE
+FRICTION_LIMIT = (
+    1000000 * SIZE * SIZE
+)  # friction ~= mass ~= size^2 (calculated implicitly using density)
 WHEEL_R = 27
 WHEEL_W = 14
-WHEELPOS = [
-    (-55, +80), (+55, +80),
-    (-55, -82), (+55, -82)
-    ]
-HULL_POLY1 = [
-    (-60, +130), (+60, +130),
-    (+60, +110), (-60, +110)
-    ]
-HULL_POLY2 = [
-    (-15, +120), (+15, +120),
-    (+20, +20), (-20, 20)
-    ]
+WHEELPOS = [(-55, +80), (+55, +80), (-55, -82), (+55, -82)]
+HULL_POLY1 = [(-60, +130), (+60, +130), (+60, +110), (-60, +110)]
+HULL_POLY2 = [(-15, +120), (+15, +120), (+20, +20), (-20, 20)]
 HULL_POLY3 = [
    (+25, +20),
    (+50, -10),
@@ -38,13 +39,10 @@ HULL_POLY3 = [
    (-20, -90),
    (-50, -40),
    (-50, -10),
-    (-25, +20)
-    ]
-HULL_POLY4 = [
-    (-50, -120), (+50, -120),
-    (+50, -90),  (-50, -90)
-    ]
-WHEEL_COLOR = (0.0,  0.0, 0.0)
+    (-25, +20),
+]
+HULL_POLY4 = [(-50, -120), (+50, -120), (+50, -90), (-50, -90)]
+WHEEL_COLOR = (0.0, 0.0, 0.0)
 WHEEL_WHITE = (0.3, 0.3, 0.3)
 MUD_COLOR = (0.4, 0.4, 0.0)

@@ -56,32 +54,60 @@ class Car:
            position=(init_x, init_y),
            angle=init_angle,
            fixtures=[
-                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY1]), density=1.0),
-                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY2]), density=1.0),
-                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY3]), density=1.0),
-                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY4]), density=1.0)
-                ]
-            )
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY1]
+                    ),
+                    density=1.0,
+                ),
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY2]
+                    ),
+                    density=1.0,
+                ),
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY3]
+                    ),
+                    density=1.0,
+                ),
+                fixtureDef(
+                    shape=polygonShape(
+                        vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY4]
+                    ),
+                    density=1.0,
+                ),
+            ],
+        )
        self.hull.color = (0.8, 0.0, 0.0)
        self.wheels = []
        self.fuel_spent = 0.0
        WHEEL_POLY = [
-            (-WHEEL_W, +WHEEL_R), (+WHEEL_W, +WHEEL_R),
-            (+WHEEL_W, -WHEEL_R), (-WHEEL_W, -WHEEL_R)
-            ]
+            (-WHEEL_W, +WHEEL_R),
+            (+WHEEL_W, +WHEEL_R),
+            (+WHEEL_W, -WHEEL_R),
+            (-WHEEL_W, -WHEEL_R),
+        ]
        for wx, wy in WHEELPOS:
            front_k = 1.0 if wy > 0 else 1.0
            w = self.world.CreateDynamicBody(
-                position=(init_x+wx*SIZE, init_y+wy*SIZE),
+                position=(init_x + wx * SIZE, init_y + wy * SIZE),
                angle=init_angle,
                fixtures=fixtureDef(
-                    shape=polygonShape(vertices=[(x*front_k*SIZE,y*front_k*SIZE) for x, y in WHEEL_POLY]),
+                    shape=polygonShape(
+                        vertices=[
+                            (x * front_k * SIZE, y * front_k * SIZE)
+                            for x, y in WHEEL_POLY
+                        ]
+                    ),
                    density=0.1,
                    categoryBits=0x0020,
                    maskBits=0x001,
-                    restitution=0.0)
-                    )
-            w.wheel_rad = front_k*WHEEL_R*SIZE
+                    restitution=0.0,
+                ),
+            )
+            w.wheel_rad = front_k * WHEEL_R * SIZE
            w.color = WHEEL_COLOR
            w.gas = 0.0
            w.brake = 0.0
@@ -93,15 +119,15 @@ class Car:
            rjd = revoluteJointDef(
                bodyA=self.hull,
                bodyB=w,
-                localAnchorA=(wx*SIZE, wy*SIZE),
-                localAnchorB=(0,0),
+                localAnchorA=(wx * SIZE, wy * SIZE),
+                localAnchorB=(0, 0),
                enableMotor=True,
                enableLimit=True,
-                maxMotorTorque=180*900*SIZE*SIZE,
+                maxMotorTorque=180 * 900 * SIZE * SIZE,
                motorSpeed=0,
                lowerAngle=-0.4,
                upperAngle=+0.4,
-                )
+            )
            w.joint = self.world.CreateJoint(rjd)
            w.tiles = set()
            w.userData = w
@@ -118,7 +144,8 @@ class Car:
        gas = np.clip(gas, 0, 1)
        for w in self.wheels[2:4]:
            diff = gas - w.gas
-            if diff > 0.1: diff = 0.1  # gradually increase, but stop immediately
+            if diff > 0.1:
+                diff = 0.1  # gradually increase, but stop immediately
            w.gas += diff

    def brake(self, b):
@@ -142,60 +169,75 @@ class Car:
            # Steer each wheel
            dir = np.sign(w.steer - w.joint.angle)
            val = abs(w.steer - w.joint.angle)
-            w.joint.motorSpeed = dir*min(50.0*val, 3.0)
+            w.joint.motorSpeed = dir * min(50.0 * val, 3.0)

            # Position => friction_limit
            grass = True
-            friction_limit = FRICTION_LIMIT*0.6  # Grass friction if no tile
+            friction_limit = FRICTION_LIMIT * 0.6  # Grass friction if no tile
            for tile in w.tiles:
-                friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
+                friction_limit = max(
+                    friction_limit, FRICTION_LIMIT * tile.road_friction
+                )
                grass = False

            # Force
-            forw = w.GetWorldVector( (0,1) )
-            side = w.GetWorldVector( (1,0) )
+            forw = w.GetWorldVector((0, 1))
+            side = w.GetWorldVector((1, 0))
            v = w.linearVelocity
-            vf = forw[0]*v[0] + forw[1]*v[1]  # forward speed
-            vs = side[0]*v[0] + side[1]*v[1]  # side speed
+            vf = forw[0] * v[0] + forw[1] * v[1]  # forward speed
+            vs = side[0] * v[0] + side[1] * v[1]  # side speed

            # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
            # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
            # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega

            # add small coef not to divide by zero
-            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)
-            self.fuel_spent += dt*ENGINE_POWER*w.gas
+            w.omega += (
+                dt
+                * ENGINE_POWER
+                * w.gas
+                / WHEEL_MOMENT_OF_INERTIA
+                / (abs(w.omega) + 5.0)
+            )
+            self.fuel_spent += dt * ENGINE_POWER * w.gas

            if w.brake >= 0.9:
                w.omega = 0
            elif w.brake > 0:
-                BRAKE_FORCE = 15    # radians per second
+                BRAKE_FORCE = 15  # radians per second
                dir = -np.sign(w.omega)
-                val = BRAKE_FORCE*w.brake
-                if abs(val) > abs(w.omega): val = abs(w.omega)  # low speed => same as = 0
-                w.omega += dir*val
-            w.phase += w.omega*dt
+                val = BRAKE_FORCE * w.brake
+                if abs(val) > abs(w.omega):
+                    val = abs(w.omega)  # low speed => same as = 0
+                w.omega += dir * val
+            w.phase += w.omega * dt

-            vr = w.omega*w.wheel_rad  # rotating wheel speed
-            f_force = -vf + vr        # force direction is direction of speed difference
+            vr = w.omega * w.wheel_rad  # rotating wheel speed
+            f_force = -vf + vr  # force direction is direction of speed difference
            p_force = -vs

            # Physically correct is to always apply friction_limit until speed is equal.
            # But dt is finite, that will lead to oscillations if difference is already near zero.

            # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
-            f_force *= 205000*SIZE*SIZE
-            p_force *= 205000*SIZE*SIZE
+            f_force *= 205000 * SIZE * SIZE
+            p_force *= 205000 * SIZE * SIZE
            force = np.sqrt(np.square(f_force) + np.square(p_force))

            # Skid trace
-            if abs(force) > 2.0*friction_limit:
-                if w.skid_particle and w.skid_particle.grass == grass and len(w.skid_particle.poly) < 30:
-                    w.skid_particle.poly.append( (w.position[0], w.position[1]) )
+            if abs(force) > 2.0 * friction_limit:
+                if (
+                    w.skid_particle
+                    and w.skid_particle.grass == grass
+                    and len(w.skid_particle.poly) < 30
+                ):
+                    w.skid_particle.poly.append((w.position[0], w.position[1]))
                elif w.skid_start is None:
                    w.skid_start = w.position
                else:
-                    w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
+                    w.skid_particle = self._create_particle(
+                        w.skid_start, w.position, grass
+                    )
                    w.skid_start = None
            else:
                w.skid_start = None
@@ -208,11 +250,15 @@ class Car:
                f_force *= force
                p_force *= force

-            w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
+            w.omega -= dt * f_force * w.wheel_rad / WHEEL_MOMENT_OF_INERTIA

-            w.ApplyForceToCenter( (
-                p_force*side[0] + f_force*forw[0],
-                p_force*side[1] + f_force*forw[1]), True )
+            w.ApplyForceToCenter(
+                (
+                    p_force * side[0] + f_force * forw[0],
+                    p_force * side[1] + f_force * forw[1],
+                ),
+                True,
+            )

    def draw(self, viewer, draw_particles=True):
        if draw_particles:
@@ -221,27 +267,34 @@ class Car:
        for obj in self.drawlist:
            for f in obj.fixtures:
                trans = f.body.transform
-                path = [trans*v for v in f.shape.vertices]
+                path = [trans * v for v in f.shape.vertices]
                viewer.draw_polygon(path, color=obj.color)
-                if "phase" not in obj.__dict__: continue
+                if "phase" not in obj.__dict__:
+                    continue
                a1 = obj.phase
                a2 = obj.phase + 1.2  # radians
                s1 = math.sin(a1)
                s2 = math.sin(a2)
                c1 = math.cos(a1)
                c2 = math.cos(a2)
-                if s1 > 0 and s2 > 0: continue
-                if s1 > 0: c1 = np.sign(c1)
-                if s2 > 0: c2 = np.sign(c2)
+                if s1 > 0 and s2 > 0:
+                    continue
+                if s1 > 0:
+                    c1 = np.sign(c1)
+                if s2 > 0:
+                    c2 = np.sign(c2)
                white_poly = [
-                    (-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
-                    (+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
-                    ]
-                viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
+                    (-WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
+                    (+WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
+                    (+WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
+                    (-WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
+                ]
+                viewer.draw_polygon([trans * v for v in white_poly], color=WHEEL_WHITE)

    def _create_particle(self, point1, point2, grass):
        class Particle:
            pass
+
        p = Particle()
        p.color = WHEEL_COLOR if not grass else MUD_COLOR
        p.ttl = 1
@@ -258,4 +311,3 @@ class Car:
        for w in self.wheels:
            self.world.DestroyBody(w)
        self.wheels = []
-
--- a/gym/envs/box2d/car_racing.py
+++ b/gym/envs/box2d/car_racing.py
@@ -137,7 +137,8 @@ class CarRacing(gym.Env, EzPickle):
        )

        self.action_space = spaces.Box(
-            np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32)
+            np.array([-1, 0, 0]).astype(np.float32),
+            np.array([+1, +1, +1]).astype(np.float32),
        )  # steer, gas, brake

        self.observation_space = spaces.Box(
--- a/gym/envs/box2d/lunar_lander.py
+++ b/gym/envs/box2d/lunar_lander.py
@@ -26,28 +26,33 @@ Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 """


-import sys, math
+import math
+import sys
 import numpy as np

 import Box2D
-from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+from Box2D.b2 import (
+    edgeShape,
+    circleShape,
+    fixtureDef,
+    polygonShape,
+    revoluteJointDef,
+    contactListener,
+)

 import gym
 from gym import spaces
 from gym.utils import seeding, EzPickle

 FPS = 50
-SCALE = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
+SCALE = 30.0  # affects how fast-paced the game is, forces should be adjusted as well

 MAIN_ENGINE_POWER = 13.0
 SIDE_ENGINE_POWER = 0.6

-INITIAL_RANDOM = 1000.0   # Set 1500 to make game harder
+INITIAL_RANDOM = 1000.0  # Set 1500 to make game harder

-LANDER_POLY =[
-    (-14, +17), (-17, 0), (-17 ,-10),
-    (+17, -10), (+17, 0), (+14, +17)
-    ]
+LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)]
 LEG_AWAY = 20
 LEG_DOWN = 18
 LEG_W, LEG_H = 2, 8
@@ -66,7 +71,10 @@ class ContactDetector(contactListener):
        self.env = env

    def BeginContact(self, contact):
-        if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
+        if (
+            self.env.lander == contact.fixtureA.body
+            or self.env.lander == contact.fixtureB.body
+        ):
            self.env.game_over = True
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
@@ -79,10 +87,7 @@ class ContactDetector(contactListener):


 class LunarLander(gym.Env, EzPickle):
-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second' : FPS
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}

    continuous = False

@@ -99,7 +104,9 @@ class LunarLander(gym.Env, EzPickle):
        self.prev_reward = None

        # useful range is -1 .. +1, but spikes can be higher
-        self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
+        self.observation_space = spaces.Box(
+            -np.inf, np.inf, shape=(8,), dtype=np.float32
+        )

        if self.continuous:
            # Action is two floats [main engine, left-right engines].
@@ -117,7 +124,8 @@ class LunarLander(gym.Env, EzPickle):
        return [seed]

    def _destroy(self):
-        if not self.moon: return
+        if not self.moon:
+            return
        self.world.contactListener = None
        self._clean_particles(True)
        self.world.DestroyBody(self.moon)
@@ -134,68 +142,77 @@ class LunarLander(gym.Env, EzPickle):
        self.game_over = False
        self.prev_shaping = None

-        W = VIEWPORT_W/SCALE
-        H = VIEWPORT_H/SCALE
+        W = VIEWPORT_W / SCALE
+        H = VIEWPORT_H / SCALE

        # terrain
        CHUNKS = 11
-        height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,))
-        chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
-        self.helipad_x1 = chunk_x[CHUNKS//2-1]
-        self.helipad_x2 = chunk_x[CHUNKS//2+1]
-        self.helipad_y = H/4
-        height[CHUNKS//2-2] = self.helipad_y
-        height[CHUNKS//2-1] = self.helipad_y
-        height[CHUNKS//2+0] = self.helipad_y
-        height[CHUNKS//2+1] = self.helipad_y
-        height[CHUNKS//2+2] = self.helipad_y
-        smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
+        height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,))
+        chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
+        self.helipad_x1 = chunk_x[CHUNKS // 2 - 1]
+        self.helipad_x2 = chunk_x[CHUNKS // 2 + 1]
+        self.helipad_y = H / 4
+        height[CHUNKS // 2 - 2] = self.helipad_y
+        height[CHUNKS // 2 - 1] = self.helipad_y
+        height[CHUNKS // 2 + 0] = self.helipad_y
+        height[CHUNKS // 2 + 1] = self.helipad_y
+        height[CHUNKS // 2 + 2] = self.helipad_y
+        smooth_y = [
+            0.33 * (height[i - 1] + height[i + 0] + height[i + 1])
+            for i in range(CHUNKS)
+        ]

-        self.moon = self.world.CreateStaticBody(shapes=edgeShape(vertices=[(0, 0), (W, 0)]))
+        self.moon = self.world.CreateStaticBody(
+            shapes=edgeShape(vertices=[(0, 0), (W, 0)])
+        )
        self.sky_polys = []
-        for i in range(CHUNKS-1):
+        for i in range(CHUNKS - 1):
            p1 = (chunk_x[i], smooth_y[i])
-            p2 = (chunk_x[i+1], smooth_y[i+1])
-            self.moon.CreateEdgeFixture(
-                vertices=[p1,p2],
-                density=0,
-                friction=0.1)
+            p2 = (chunk_x[i + 1], smooth_y[i + 1])
+            self.moon.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1)
            self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])

        self.moon.color1 = (0.0, 0.0, 0.0)
        self.moon.color2 = (0.0, 0.0, 0.0)

-        initial_y = VIEWPORT_H/SCALE
+        initial_y = VIEWPORT_H / SCALE
        self.lander = self.world.CreateDynamicBody(
-            position=(VIEWPORT_W/SCALE/2, initial_y),
+            position=(VIEWPORT_W / SCALE / 2, initial_y),
            angle=0.0,
-            fixtures = fixtureDef(
-                shape=polygonShape(vertices=[(x/SCALE, y/SCALE) for x, y in LANDER_POLY]),
+            fixtures=fixtureDef(
+                shape=polygonShape(
+                    vertices=[(x / SCALE, y / SCALE) for x, y in LANDER_POLY]
+                ),
                density=5.0,
                friction=0.1,
                categoryBits=0x0010,
-                maskBits=0x001,   # collide only with ground
-                restitution=0.0)  # 0.99 bouncy
-                )
+                maskBits=0x001,  # collide only with ground
+                restitution=0.0,
+            ),  # 0.99 bouncy
+        )
        self.lander.color1 = (0.5, 0.4, 0.9)
        self.lander.color2 = (0.3, 0.3, 0.5)
-        self.lander.ApplyForceToCenter( (
-            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
-            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
-            ), True)
+        self.lander.ApplyForceToCenter(
+            (
+                self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
+                self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
+            ),
+            True,
+        )

        self.legs = []
        for i in [-1, +1]:
            leg = self.world.CreateDynamicBody(
-                position=(VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
+                position=(VIEWPORT_W / SCALE / 2 - i * LEG_AWAY / SCALE, initial_y),
                angle=(i * 0.05),
                fixtures=fixtureDef(
-                    shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
+                    shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)),
                    density=1.0,
                    restitution=0.0,
                    categoryBits=0x0020,
-                    maskBits=0x001)
-                )
+                    maskBits=0x001,
+                ),
+            )
            leg.ground_contact = False
            leg.color1 = (0.5, 0.4, 0.9)
            leg.color2 = (0.3, 0.3, 0.5)
@@ -203,14 +220,16 @@ class LunarLander(gym.Env, EzPickle):
                bodyA=self.lander,
                bodyB=leg,
                localAnchorA=(0, 0),
-                localAnchorB=(i * LEG_AWAY/SCALE, LEG_DOWN/SCALE),
+                localAnchorB=(i * LEG_AWAY / SCALE, LEG_DOWN / SCALE),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=LEG_SPRING_TORQUE,
-                motorSpeed=+0.3 * i  # low enough not to jump back into the sky
-                )
+                motorSpeed=+0.3 * i,  # low enough not to jump back into the sky
+            )
            if i == -1:
-                rjd.lowerAngle = +0.9 - 0.5  # The most esoteric numbers here, angled legs have freedom to travel within
+                rjd.lowerAngle = (
+                    +0.9 - 0.5
+                )  # The most esoteric numbers here, angled legs have freedom to travel within
                rjd.upperAngle = +0.9
            else:
                rjd.lowerAngle = -0.9
@@ -224,16 +243,17 @@ class LunarLander(gym.Env, EzPickle):

    def _create_particle(self, mass, x, y, ttl):
        p = self.world.CreateDynamicBody(
-            position = (x, y),
+            position=(x, y),
            angle=0.0,
-            fixtures = fixtureDef(
-                shape=circleShape(radius=2/SCALE, pos=(0, 0)),
+            fixtures=fixtureDef(
+                shape=circleShape(radius=2 / SCALE, pos=(0, 0)),
                density=mass,
                friction=0.1,
                categoryBits=0x0100,
                maskBits=0x001,  # collide only with ground
-                restitution=0.3)
-                )
+                restitution=0.3,
+            ),
+        )
        p.ttl = ttl
        self.particles.append(p)
        self._clean_particles(False)
@@ -247,86 +267,115 @@ class LunarLander(gym.Env, EzPickle):
        if self.continuous:
            action = np.clip(action, -1, +1).astype(np.float32)
        else:
-            assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
+            assert self.action_space.contains(action), "%r (%s) invalid " % (
+                action,
+                type(action),
+            )

        # Engines
-        tip  = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+        tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
        side = (-tip[1], tip[0])
        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]

        m_power = 0.0
-        if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
+        if (self.continuous and action[0] > 0.0) or (
+            not self.continuous and action == 2
+        ):
            # Main engine
            if self.continuous:
-                m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5   # 0.5..1.0
+                m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5  # 0.5..1.0
                assert m_power >= 0.5 and m_power <= 1.0
            else:
                m_power = 1.0
-            ox = (tip[0] * (4/SCALE + 2 * dispersion[0]) +
-                  side[0] * dispersion[1])  # 4 is move a bit downwards, +-2 for randomness
-            oy = -tip[1] * (4/SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
+            ox = (
+                tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
+            )  # 4 is move a bit downwards, +-2 for randomness
+            oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
-            p = self._create_particle(3.5,  # 3.5 is here to make particle speed adequate
-                                      impulse_pos[0],
-                                      impulse_pos[1],
-                                      m_power)  # particles are just a decoration
-            p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
-                                 impulse_pos,
-                                 True)
-            self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
-                                           impulse_pos,
-                                           True)
+            p = self._create_particle(
+                3.5,  # 3.5 is here to make particle speed adequate
+                impulse_pos[0],
+                impulse_pos[1],
+                m_power,
+            )  # particles are just a decoration
+            p.ApplyLinearImpulse(
+                (ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
+                impulse_pos,
+                True,
+            )
+            self.lander.ApplyLinearImpulse(
+                (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
+                impulse_pos,
+                True,
+            )

        s_power = 0.0
-        if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
+        if (self.continuous and np.abs(action[1]) > 0.5) or (
+            not self.continuous and action in [1, 3]
+        ):
            # Orientation engines
            if self.continuous:
                direction = np.sign(action[1])
                s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
                assert s_power >= 0.5 and s_power <= 1.0
            else:
-                direction = action-2
+                direction = action - 2
                s_power = 1.0
-            ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
-            oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
-            impulse_pos = (self.lander.position[0] + ox - tip[0] * 17/SCALE,
-                           self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT/SCALE)
+            ox = tip[0] * dispersion[0] + side[0] * (
+                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
+            )
+            oy = -tip[1] * dispersion[0] - side[1] * (
+                3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
+            )
+            impulse_pos = (
+                self.lander.position[0] + ox - tip[0] * 17 / SCALE,
+                self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
+            )
            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
-            p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
-                                 impulse_pos
-                                 , True)
-            self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
-                                           impulse_pos,
-                                           True)
+            p.ApplyLinearImpulse(
+                (ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
+                impulse_pos,
+                True,
+            )
+            self.lander.ApplyLinearImpulse(
+                (-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
+                impulse_pos,
+                True,
+            )

-        self.world.Step(1.0/FPS, 6*30, 2*30)
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)

        pos = self.lander.position
        vel = self.lander.linearVelocity
        state = [
-            (pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
-            (pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
-            vel.x*(VIEWPORT_W/SCALE/2)/FPS,
-            vel.y*(VIEWPORT_H/SCALE/2)/FPS,
+            (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
+            (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
+            vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
+            vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
            self.lander.angle,
-            20.0*self.lander.angularVelocity/FPS,
+            20.0 * self.lander.angularVelocity / FPS,
            1.0 if self.legs[0].ground_contact else 0.0,
-            1.0 if self.legs[1].ground_contact else 0.0
-            ]
+            1.0 if self.legs[1].ground_contact else 0.0,
+        ]
        assert len(state) == 8

        reward = 0
-        shaping = \
-            - 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
-            - 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
-            - 100*abs(state[4]) + 10*state[6] + 10*state[7]  # And ten points for legs contact, the idea is if you
-                                                             # lose contact again after landing, you get negative reward
+        shaping = (
+            -100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
+            - 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
+            - 100 * abs(state[4])
+            + 10 * state[6]
+            + 10 * state[7]
+        )  # And ten points for legs contact, the idea is if you
+        # lose contact again after landing, you get negative reward
        if self.prev_shaping is not None:
            reward = shaping - self.prev_shaping
        self.prev_shaping = shaping

-        reward -= m_power*0.30  # less fuel spent is better, about -30 for heuristic landing
-        reward -= s_power*0.03
+        reward -= (
+            m_power * 0.30
+        )  # less fuel spent is better, about -30 for heuristic landing
+        reward -= s_power * 0.03

        done = False
        if self.game_over or abs(state[0]) >= 1.0:
@@ -337,16 +386,25 @@ class LunarLander(gym.Env, EzPickle):
            reward = +100
        return np.array(state, dtype=np.float32), reward, done, {}

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        from gym.envs.classic_control import rendering
+
        if self.viewer is None:
            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
-            self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
+            self.viewer.set_bounds(0, VIEWPORT_W / SCALE, 0, VIEWPORT_H / SCALE)

        for obj in self.particles:
            obj.ttl -= 0.15
-            obj.color1 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
-            obj.color2 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
+            obj.color1 = (
+                max(0.2, 0.2 + obj.ttl),
+                max(0.2, 0.5 * obj.ttl),
+                max(0.2, 0.5 * obj.ttl),
+            )
+            obj.color2 = (
+                max(0.2, 0.2 + obj.ttl),
+                max(0.2, 0.5 * obj.ttl),
+                max(0.2, 0.5 * obj.ttl),
+            )

        self._clean_particles(False)

@@ -357,23 +415,33 @@ class LunarLander(gym.Env, EzPickle):
            for f in obj.fixtures:
                trans = f.body.transform
                if type(f.shape) is circleShape:
-                    t = rendering.Transform(translation=trans*f.shape.pos)
-                    self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
-                    self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                    t = rendering.Transform(translation=trans * f.shape.pos)
+                    self.viewer.draw_circle(
+                        f.shape.radius, 20, color=obj.color1
+                    ).add_attr(t)
+                    self.viewer.draw_circle(
+                        f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2
+                    ).add_attr(t)
                else:
-                    path = [trans*v for v in f.shape.vertices]
+                    path = [trans * v for v in f.shape.vertices]
                    self.viewer.draw_polygon(path, color=obj.color1)
                    path.append(path[0])
                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)

        for x in [self.helipad_x1, self.helipad_x2]:
            flagy1 = self.helipad_y
-            flagy2 = flagy1 + 50/SCALE
+            flagy2 = flagy1 + 50 / SCALE
            self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
-            self.viewer.draw_polygon([(x, flagy2), (x, flagy2-10/SCALE), (x + 25/SCALE, flagy2 - 5/SCALE)],
-                                     color=(0.8, 0.8, 0))
+            self.viewer.draw_polygon(
+                [
+                    (x, flagy2),
+                    (x, flagy2 - 10 / SCALE),
+                    (x + 25 / SCALE, flagy2 - 5 / SCALE),
+                ],
+                color=(0.8, 0.8, 0),
+            )

-        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def close(self):
        if self.viewer is not None:
@@ -384,6 +452,7 @@ class LunarLander(gym.Env, EzPickle):
 class LunarLanderContinuous(LunarLander):
    continuous = True

+
 def heuristic(env, s):
    """
    The heuristic for
@@ -405,28 +474,38 @@ def heuristic(env, s):
         a: The heuristic to be fed into the step function defined above to determine the next step and reward.
    """

-    angle_targ = s[0]*0.5 + s[2]*1.0         # angle should point towards center
-    if angle_targ > 0.4: angle_targ = 0.4    # more than 0.4 radians (22 degrees) is bad
-    if angle_targ < -0.4: angle_targ = -0.4
-    hover_targ = 0.55*np.abs(s[0])           # target y should be proportional to horizontal offset
+    angle_targ = s[0] * 0.5 + s[2] * 1.0  # angle should point towards center
+    if angle_targ > 0.4:
+        angle_targ = 0.4  # more than 0.4 radians (22 degrees) is bad
+    if angle_targ < -0.4:
+        angle_targ = -0.4
+    hover_targ = 0.55 * np.abs(
+        s[0]
+    )  # target y should be proportional to horizontal offset

-    angle_todo = (angle_targ - s[4]) * 0.5 - (s[5])*1.0
-    hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
+    angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
+    hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5

    if s[6] or s[7]:  # legs have contact
        angle_todo = 0
-        hover_todo = -(s[3])*0.5  # override to reduce fall speed, that's all we need after contact
+        hover_todo = (
+            -(s[3]) * 0.5
+        )  # override to reduce fall speed, that's all we need after contact

    if env.continuous:
-        a = np.array([hover_todo*20 - 1, -angle_todo*20])
+        a = np.array([hover_todo * 20 - 1, -angle_todo * 20])
        a = np.clip(a, -1, +1)
    else:
        a = 0
-        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
-        elif angle_todo < -0.05: a = 3
-        elif angle_todo > +0.05: a = 1
+        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
+            a = 2
+        elif angle_todo < -0.05:
+            a = 3
+        elif angle_todo > +0.05:
+            a = 1
    return a

+
 def demo_heuristic_lander(env, seed=None, render=False):
    env.seed(seed)
    total_reward = 0
@@ -439,17 +518,19 @@ def demo_heuristic_lander(env, seed=None, render=False):

        if render:
            still_open = env.render()
-            if still_open == False: break
+            if still_open == False:
+                break

        if steps % 20 == 0 or done:
            print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
            print("step {} total_reward {:+0.2f}".format(steps, total_reward))
        steps += 1
-        if done: break
+        if done:
+            break
    if render:
        env.close()
    return total_reward


-if __name__ == '__main__':
+if __name__ == "__main__":
    demo_heuristic_lander(LunarLander(), render=True)
--- a/gym/envs/box2d/test_lunar_lander.py
+++ b/gym/envs/box2d/test_lunar_lander.py
@@ -1,4 +1,5 @@
 import pytest
+
 try:
    import Box2D
    from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
@@ -6,17 +7,17 @@ except ImportError:
    Box2D = None


-@pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
+@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
 def test_lunar_lander():
    _test_lander(LunarLander(), seed=0)

-@pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
+
+@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
 def test_lunar_lander_continuous():
    _test_lander(LunarLanderContinuous(), seed=0)

-@pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
+
+@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
 def _test_lander(env, seed=None, render=False):
    total_reward = demo_heuristic_lander(env, seed=seed, render=render)
    assert total_reward > 100
-
-
--- a/gym/envs/classic_control/init.py
+++ b/gym/envs/classic_control/init.py
@@ -3,4 +3,3 @@ from gym.envs.classic_control.mountain_car import MountainCarEnv
 from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
 from gym.envs.classic_control.pendulum import PendulumEnv
 from gym.envs.classic_control.acrobot import AcrobotEnv
-
--- a/gym/envs/classic_control/acrobot.py
+++ b/gym/envs/classic_control/acrobot.py
@@ -6,14 +6,20 @@ from gym import core, spaces
 from gym.utils import seeding

 __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
-__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
-               "William Dabney", "Jonathan P. How"]
+__credits__ = [
+    "Alborz Geramifard",
+    "Robert H. Klein",
+    "Christoph Dann",
+    "William Dabney",
+    "Jonathan P. How",
+]
 __license__ = "BSD 3-Clause"
 __author__ = "Christoph Dann <cdann@cdann.de>"

 # SOURCE:
 # https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py

+
 class AcrobotEnv(core.Env):

    """
@@ -55,27 +61,24 @@ class AcrobotEnv(core.Env):
        see the AcrobotLegacy class.
    """

-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second' : 15
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 15}

-    dt = .2
+    dt = 0.2

-    LINK_LENGTH_1 = 1.  # [m]
-    LINK_LENGTH_2 = 1.  # [m]
-    LINK_MASS_1 = 1.  #: [kg] mass of link 1
-    LINK_MASS_2 = 1.  #: [kg] mass of link 2
+    LINK_LENGTH_1 = 1.0  # [m]
+    LINK_LENGTH_2 = 1.0  # [m]
+    LINK_MASS_1 = 1.0  #: [kg] mass of link 1
+    LINK_MASS_2 = 1.0  #: [kg] mass of link 2
    LINK_COM_POS_1 = 0.5  #: [m] position of the center of mass of link 1
    LINK_COM_POS_2 = 0.5  #: [m] position of the center of mass of link 2
-    LINK_MOI = 1.  #: moments of inertia for both links
+    LINK_MOI = 1.0  #: moments of inertia for both links

    MAX_VEL_1 = 4 * pi
    MAX_VEL_2 = 9 * pi

-    AVAIL_TORQUE = [-1., 0., +1]
+    AVAIL_TORQUE = [-1.0, 0.0, +1]

-    torque_noise_max = 0.
+    torque_noise_max = 0.0

    #: use dynamics equations from the nips paper or the book
    book_or_nips = "book"
@@ -85,7 +88,9 @@ class AcrobotEnv(core.Env):

    def __init__(self):
        self.viewer = None
-        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32)
+        high = np.array(
+            [1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32
+        )
        low = -high
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
        self.action_space = spaces.Discrete(3)
@@ -106,7 +111,9 @@ class AcrobotEnv(core.Env):

        # Add noise to the force action
        if self.torque_noise_max > 0:
-            torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
+            torque += self.np_random.uniform(
+                -self.torque_noise_max, self.torque_noise_max
+            )

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
@@ -127,7 +134,7 @@ class AcrobotEnv(core.Env):
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns
        terminal = self._terminal()
-        reward = -1. if not terminal else 0.
+        reward = -1.0 if not terminal else 0.0
        return (self._get_ob(), reward, terminal, {})

    def _get_ob(self):
@@ -136,7 +143,7 @@ class AcrobotEnv(core.Env):

    def _terminal(self):
        s = self.state
-        return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.)
+        return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.0)

    def _dsdt(self, s_augmented, t):
        m1 = self.LINK_MASS_1
@@ -153,66 +160,76 @@ class AcrobotEnv(core.Env):
        theta2 = s[1]
        dtheta1 = s[2]
        dtheta2 = s[3]
-        d1 = m1 * lc1 ** 2 + m2 * \
-            (l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * cos(theta2)) + I1 + I2
+        d1 = (
+            m1 * lc1 ** 2
+            + m2 * (l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * cos(theta2))
+            + I1
+            + I2
+        )
        d2 = m2 * (lc2 ** 2 + l1 * lc2 * cos(theta2)) + I2
-        phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.)
-        phi1 = - m2 * l1 * lc2 * dtheta2 ** 2 * sin(theta2) \
-               - 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2)  \
-            + (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2) + phi2
+        phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.0)
+        phi1 = (
+            -m2 * l1 * lc2 * dtheta2 ** 2 * sin(theta2)
+            - 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2)
+            + (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2)
+            + phi2
+        )
        if self.book_or_nips == "nips":
            # the following line is consistent with the description in the
            # paper
-            ddtheta2 = (a + d2 / d1 * phi1 - phi2) / \
-                (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
+            ddtheta2 = (a + d2 / d1 * phi1 - phi2) / (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
        else:
            # the following line is consistent with the java implementation and the
            # book
-            ddtheta2 = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * sin(theta2) - phi2) \
-                / (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
+            ddtheta2 = (
+                a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * sin(theta2) - phi2
+            ) / (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
        ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
-        return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.)
+        return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0)

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        from gym.envs.classic_control import rendering

        s = self.state

        if self.viewer is None:
-            self.viewer = rendering.Viewer(500,500)
+            self.viewer = rendering.Viewer(500, 500)
            bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2  # 2.2 for default
-            self.viewer.set_bounds(-bound,bound,-bound,bound)
+            self.viewer.set_bounds(-bound, bound, -bound, bound)

-        if s is None: return None
+        if s is None:
+            return None

-        p1 = [-self.LINK_LENGTH_1 *
-              cos(s[0]), self.LINK_LENGTH_1 * sin(s[0])]
+        p1 = [-self.LINK_LENGTH_1 * cos(s[0]), self.LINK_LENGTH_1 * sin(s[0])]

-        p2 = [p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]),
-              p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1])]
+        p2 = [
+            p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]),
+            p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1]),
+        ]

-        xys = np.array([[0,0], p1, p2])[:,::-1]
-        thetas = [s[0]- pi/2, s[0]+s[1]-pi/2]
+        xys = np.array([[0, 0], p1, p2])[:, ::-1]
+        thetas = [s[0] - pi / 2, s[0] + s[1] - pi / 2]
        link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]

        self.viewer.draw_line((-2.2, 1), (2.2, 1))
-        for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
-            l,r,t,b = 0, llen, .1, -.1
-            jtransform = rendering.Transform(rotation=th, translation=(x,y))
-            link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
+        for ((x, y), th, llen) in zip(xys, thetas, link_lengths):
+            l, r, t, b = 0, llen, 0.1, -0.1
+            jtransform = rendering.Transform(rotation=th, translation=(x, y))
+            link = self.viewer.draw_polygon([(l, b), (l, t), (r, t), (r, b)])
            link.add_attr(jtransform)
-            link.set_color(0,.8, .8)
-            circ = self.viewer.draw_circle(.1)
-            circ.set_color(.8, .8, 0)
+            link.set_color(0, 0.8, 0.8)
+            circ = self.viewer.draw_circle(0.1)
+            circ.set_color(0.8, 0.8, 0)
            circ.add_attr(jtransform)

-        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None

+
 def wrap(x, m, M):
    """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
@@ -233,6 +250,7 @@ def wrap(x, m, M):
        x = x + diff
    return x

+
 def bound(x, m, M=None):
    """Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
    have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
@@ -297,7 +315,6 @@ def rk4(derivs, y0, t, *args, **kwargs):

    yout[0] = y0

-
    for i in np.arange(len(t) - 1):

        thist = t[i]
--- a/gym/envs/classic_control/cartpole.py
+++ b/gym/envs/classic_control/cartpole.py
@@ -58,21 +58,18 @@ class CartPoleEnv(gym.Env):
        195.0 over 100 consecutive trials.
    """

-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second': 50
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}

    def __init__(self):
        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
-        self.total_mass = (self.masspole + self.masscart)
+        self.total_mass = self.masspole + self.masscart
        self.length = 0.5  # actually half the pole's length
-        self.polemass_length = (self.masspole * self.length)
+        self.polemass_length = self.masspole * self.length
        self.force_mag = 10.0
        self.tau = 0.02  # seconds between state updates
-        self.kinematics_integrator = 'euler'
+        self.kinematics_integrator = "euler"

        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
@@ -80,11 +77,15 @@ class CartPoleEnv(gym.Env):

        # Angle limit set to 2 * theta_threshold_radians so failing observation
        # is still within bounds.
-        high = np.array([self.x_threshold * 2,
-                         np.finfo(np.float32).max,
-                         self.theta_threshold_radians * 2,
-                         np.finfo(np.float32).max],
-                        dtype=np.float32)
+        high = np.array(
+            [
+                self.x_threshold * 2,
+                np.finfo(np.float32).max,
+                self.theta_threshold_radians * 2,
+                np.finfo(np.float32).max,
+            ],
+            dtype=np.float32,
+        )

        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
@@ -110,11 +111,15 @@ class CartPoleEnv(gym.Env):

        # For the interested reader:
        # https://coneural.org/florian/papers/05_cart_pole.pdf
-        temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass
-        thetaacc = (self.gravity * sintheta - costheta * temp) / (self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass))
+        temp = (
+            force + self.polemass_length * theta_dot ** 2 * sintheta
+        ) / self.total_mass
+        thetaacc = (self.gravity * sintheta - costheta * temp) / (
+            self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)
+        )
        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass

-        if self.kinematics_integrator == 'euler':
+        if self.kinematics_integrator == "euler":
            x = x + self.tau * x_dot
            x_dot = x_dot + self.tau * xacc
            theta = theta + self.tau * theta_dot
@@ -158,12 +163,12 @@ class CartPoleEnv(gym.Env):
        self.steps_beyond_done = None
        return np.array(self.state)

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        screen_width = 600
        screen_height = 400

        world_width = self.x_threshold * 2
-        scale = screen_width/world_width
+        scale = screen_width / world_width
        carty = 100  # TOP OF CART
        polewidth = 10.0
        polelen = scale * (2 * self.length)
@@ -172,6 +177,7 @@ class CartPoleEnv(gym.Env):

        if self.viewer is None:
            from gym.envs.classic_control import rendering
+
            self.viewer = rendering.Viewer(screen_width, screen_height)
            l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
            axleoffset = cartheight / 4.0
@@ -179,17 +185,22 @@ class CartPoleEnv(gym.Env):
            self.carttrans = rendering.Transform()
            cart.add_attr(self.carttrans)
            self.viewer.add_geom(cart)
-            l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
+            l, r, t, b = (
+                -polewidth / 2,
+                polewidth / 2,
+                polelen - polewidth / 2,
+                -polewidth / 2,
+            )
            pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
-            pole.set_color(.8, .6, .4)
+            pole.set_color(0.8, 0.6, 0.4)
            self.poletrans = rendering.Transform(translation=(0, axleoffset))
            pole.add_attr(self.poletrans)
            pole.add_attr(self.carttrans)
            self.viewer.add_geom(pole)
-            self.axle = rendering.make_circle(polewidth/2)
+            self.axle = rendering.make_circle(polewidth / 2)
            self.axle.add_attr(self.poletrans)
            self.axle.add_attr(self.carttrans)
-            self.axle.set_color(.5, .5, .8)
+            self.axle.set_color(0.5, 0.5, 0.8)
            self.viewer.add_geom(self.axle)
            self.track = rendering.Line((0, carty), (screen_width, carty))
            self.track.set_color(0, 0, 0)
@@ -202,7 +213,12 @@ class CartPoleEnv(gym.Env):

        # Edit the pole polygon vertex
        pole = self._pole_geom
-        l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
+        l, r, t, b = (
+            -polewidth / 2,
+            polewidth / 2,
+            polelen - polewidth / 2,
+            -polewidth / 2,
+        )
        pole.v = [(l, b), (l, t), (r, t), (r, b)]

        x = self.state
@@ -210,7 +226,7 @@ class CartPoleEnv(gym.Env):
        self.carttrans.set_translation(cartx, carty)
        self.poletrans.set_rotation(-x[2])

-        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def close(self):
        if self.viewer:
--- a/gym/envs/classic_control/continuous_mountain_car.py
+++ b/gym/envs/classic_control/continuous_mountain_car.py
@@ -53,10 +53,8 @@ class Continuous_MountainCarEnv(gym.Env):
         The car position is more than 0.45
         Episode length is greater than 200
    """
-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second': 30
-    }
+
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}

    def __init__(self, goal_velocity=0):
        self.min_action = -1.0
@@ -64,7 +62,9 @@ class Continuous_MountainCarEnv(gym.Env):
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
-        self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
+        self.goal_position = (
+            0.45  # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
+        )
        self.goal_velocity = goal_velocity
        self.power = 0.0015

@@ -78,15 +78,10 @@ class Continuous_MountainCarEnv(gym.Env):
        self.viewer = None

        self.action_space = spaces.Box(
-            low=self.min_action,
-            high=self.max_action,
-            shape=(1,),
-            dtype=np.float32
+            low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
        )
        self.observation_space = spaces.Box(
-            low=self.low_state,
-            high=self.high_state,
-            dtype=np.float32
+            low=self.low_state, high=self.high_state, dtype=np.float32
        )

        self.seed()
@@ -103,17 +98,20 @@ class Continuous_MountainCarEnv(gym.Env):
        force = min(max(action[0], self.min_action), self.max_action)

        velocity += force * self.power - 0.0025 * math.cos(3 * position)
-        if (velocity > self.max_speed): velocity = self.max_speed
-        if (velocity < -self.max_speed): velocity = -self.max_speed
+        if velocity > self.max_speed:
+            velocity = self.max_speed
+        if velocity < -self.max_speed:
+            velocity = -self.max_speed
        position += velocity
-        if (position > self.max_position): position = self.max_position
-        if (position < self.min_position): position = self.min_position
-        if (position == self.min_position and velocity < 0): velocity = 0
+        if position > self.max_position:
+            position = self.max_position
+        if position < self.min_position:
+            position = self.min_position
+        if position == self.min_position and velocity < 0:
+            velocity = 0

        # Convert a possible numpy bool to a Python bool.
-        done = bool(
-            position >= self.goal_position and velocity >= self.goal_velocity
-        )
+        done = bool(position >= self.goal_position and velocity >= self.goal_velocity)

        reward = 0
        if done:
@@ -128,23 +126,24 @@ class Continuous_MountainCarEnv(gym.Env):
        return np.array(self.state)

    def _height(self, xs):
-        return np.sin(3 * xs)*.45+.55
+        return np.sin(3 * xs) * 0.45 + 0.55

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        screen_width = 600
        screen_height = 400

        world_width = self.max_position - self.min_position
-        scale = screen_width/world_width
+        scale = screen_width / world_width
        carwidth = 40
        carheight = 20

        if self.viewer is None:
            from gym.envs.classic_control import rendering
+
            self.viewer = rendering.Viewer(screen_width, screen_height)
            xs = np.linspace(self.min_position, self.max_position, 100)
            ys = self._height(xs)
-            xys = list(zip((xs-self.min_position)*scale, ys*scale))
+            xys = list(zip((xs - self.min_position) * scale, ys * scale))

            self.track = rendering.make_polyline(xys)
            self.track.set_linewidth(4)
@@ -159,7 +158,7 @@ class Continuous_MountainCarEnv(gym.Env):
            car.add_attr(self.cartrans)
            self.viewer.add_geom(car)
            frontwheel = rendering.make_circle(carheight / 2.5)
-            frontwheel.set_color(.5, .5, .5)
+            frontwheel.set_color(0.5, 0.5, 0.5)
            frontwheel.add_attr(
                rendering.Transform(translation=(carwidth / 4, clearance))
            )
@@ -170,26 +169,26 @@ class Continuous_MountainCarEnv(gym.Env):
                rendering.Transform(translation=(-carwidth / 4, clearance))
            )
            backwheel.add_attr(self.cartrans)
-            backwheel.set_color(.5, .5, .5)
+            backwheel.set_color(0.5, 0.5, 0.5)
            self.viewer.add_geom(backwheel)
-            flagx = (self.goal_position-self.min_position)*scale
-            flagy1 = self._height(self.goal_position)*scale
+            flagx = (self.goal_position - self.min_position) * scale
+            flagy1 = self._height(self.goal_position) * scale
            flagy2 = flagy1 + 50
            flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
            self.viewer.add_geom(flagpole)
            flag = rendering.FilledPolygon(
                [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
            )
-            flag.set_color(.8, .8, 0)
+            flag.set_color(0.8, 0.8, 0)
            self.viewer.add_geom(flag)

        pos = self.state[0]
        self.cartrans.set_translation(
-            (pos-self.min_position) * scale, self._height(pos) * scale
+            (pos - self.min_position) * scale, self._height(pos) * scale
        )
        self.cartrans.set_rotation(math.cos(3 * pos))

-        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def close(self):
        if self.viewer:
--- a/gym/envs/classic_control/mountain_car.py
+++ b/gym/envs/classic_control/mountain_car.py
@@ -52,10 +52,7 @@ class MountainCarEnv(gym.Env):
         Episode length is greater than 200
    """

-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second': 30
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}

    def __init__(self, goal_velocity=0):
        self.min_position = -1.2
@@ -67,19 +64,13 @@ class MountainCarEnv(gym.Env):
        self.force = 0.001
        self.gravity = 0.0025

-        self.low = np.array(
-            [self.min_position, -self.max_speed], dtype=np.float32
-        )
-        self.high = np.array(
-            [self.max_position, self.max_speed], dtype=np.float32
-        )
+        self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
+        self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)

        self.viewer = None

        self.action_space = spaces.Discrete(3)
-        self.observation_space = spaces.Box(
-            self.low, self.high, dtype=np.float32
-        )
+        self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)

        self.seed()

@@ -88,19 +79,20 @@ class MountainCarEnv(gym.Env):
        return [seed]

    def step(self, action):
-        assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
+        assert self.action_space.contains(action), "%r (%s) invalid" % (
+            action,
+            type(action),
+        )

        position, velocity = self.state
        velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
        position += velocity
        position = np.clip(position, self.min_position, self.max_position)
-        if (position == self.min_position and velocity < 0):
+        if position == self.min_position and velocity < 0:
            velocity = 0

-        done = bool(
-            position >= self.goal_position and velocity >= self.goal_velocity
-        )
+        done = bool(position >= self.goal_position and velocity >= self.goal_velocity)
        reward = -1.0

        self.state = (position, velocity)
@@ -111,9 +103,9 @@ class MountainCarEnv(gym.Env):
        return np.array(self.state)

    def _height(self, xs):
-        return np.sin(3 * xs) * .45 + .55
+        return np.sin(3 * xs) * 0.45 + 0.55

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        screen_width = 600
        screen_height = 400

@@ -124,6 +116,7 @@ class MountainCarEnv(gym.Env):

        if self.viewer is None:
            from gym.envs.classic_control import rendering
+
            self.viewer = rendering.Viewer(screen_width, screen_height)
            xs = np.linspace(self.min_position, self.max_position, 100)
            ys = self._height(xs)
@@ -142,7 +135,7 @@ class MountainCarEnv(gym.Env):
            car.add_attr(self.cartrans)
            self.viewer.add_geom(car)
            frontwheel = rendering.make_circle(carheight / 2.5)
-            frontwheel.set_color(.5, .5, .5)
+            frontwheel.set_color(0.5, 0.5, 0.5)
            frontwheel.add_attr(
                rendering.Transform(translation=(carwidth / 4, clearance))
            )
@@ -153,9 +146,9 @@ class MountainCarEnv(gym.Env):
                rendering.Transform(translation=(-carwidth / 4, clearance))
            )
            backwheel.add_attr(self.cartrans)
-            backwheel.set_color(.5, .5, .5)
+            backwheel.set_color(0.5, 0.5, 0.5)
            self.viewer.add_geom(backwheel)
-            flagx = (self.goal_position-self.min_position) * scale
+            flagx = (self.goal_position - self.min_position) * scale
            flagy1 = self._height(self.goal_position) * scale
            flagy2 = flagy1 + 50
            flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
@@ -163,16 +156,16 @@ class MountainCarEnv(gym.Env):
            flag = rendering.FilledPolygon(
                [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
            )
-            flag.set_color(.8, .8, 0)
+            flag.set_color(0.8, 0.8, 0)
            self.viewer.add_geom(flag)

        pos = self.state[0]
        self.cartrans.set_translation(
-            (pos-self.min_position) * scale, self._height(pos) * scale
+            (pos - self.min_position) * scale, self._height(pos) * scale
        )
        self.cartrans.set_rotation(math.cos(3 * pos))

-        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def get_keys_to_action(self):
        # Control with left and right arrow keys.
--- a/gym/envs/classic_control/pendulum.py
+++ b/gym/envs/classic_control/pendulum.py
@@ -6,31 +6,22 @@ from os import path


 class PendulumEnv(gym.Env):
-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second': 30
-    }
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}

    def __init__(self, g=10.0):
        self.max_speed = 8
-        self.max_torque = 2.
-        self.dt = .05
+        self.max_torque = 2.0
+        self.dt = 0.05
        self.g = g
-        self.m = 1.
-        self.l = 1.
+        self.m = 1.0
+        self.l = 1.0
        self.viewer = None

-        high = np.array([1., 1., self.max_speed], dtype=np.float32)
+        high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
        self.action_space = spaces.Box(
-            low=-self.max_torque,
-            high=self.max_torque, shape=(1,),
-            dtype=np.float32
-        )
-        self.observation_space = spaces.Box(
-            low=-high,
-            high=high,
-            dtype=np.float32
+            low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
        )
+        self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)

        self.seed()

@@ -48,9 +39,12 @@ class PendulumEnv(gym.Env):

        u = np.clip(u, -self.max_torque, self.max_torque)[0]
        self.last_u = u  # for rendering
-        costs = angle_normalize(th) ** 2 + .1 * thdot ** 2 + .001 * (u ** 2)
+        costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)

-        newthdot = thdot + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3. / (m * l ** 2) * u) * dt
+        newthdot = (
+            thdot
+            + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3.0 / (m * l ** 2) * u) * dt
+        )
        newth = th + newthdot * dt
        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)

@@ -67,21 +61,22 @@ class PendulumEnv(gym.Env):
        theta, thetadot = self.state
        return np.array([np.cos(theta), np.sin(theta), thetadot])

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
+
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
-            rod = rendering.make_capsule(1, .2)
-            rod.set_color(.8, .3, .3)
+            rod = rendering.make_capsule(1, 0.2)
+            rod.set_color(0.8, 0.3, 0.3)
            self.pole_transform = rendering.Transform()
            rod.add_attr(self.pole_transform)
            self.viewer.add_geom(rod)
-            axle = rendering.make_circle(.05)
+            axle = rendering.make_circle(0.05)
            axle.set_color(0, 0, 0)
            self.viewer.add_geom(axle)
            fname = path.join(path.dirname(__file__), "assets/clockwise.png")
-            self.img = rendering.Image(fname, 1., 1.)
+            self.img = rendering.Image(fname, 1.0, 1.0)
            self.imgtrans = rendering.Transform()
            self.img.add_attr(self.imgtrans)

@@ -90,7 +85,7 @@ class PendulumEnv(gym.Env):
        if self.last_u:
            self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)

-        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def close(self):
        if self.viewer:
@@ -99,4 +94,4 @@ class PendulumEnv(gym.Env):


 def angle_normalize(x):
-    return (((x+np.pi) % (2*np.pi)) - np.pi)
+    return ((x + np.pi) % (2 * np.pi)) - np.pi
--- a/gym/envs/classic_control/rendering.py
+++ b/gym/envs/classic_control/rendering.py
@@ -5,8 +5,8 @@ import os
 import sys

 if "Apple" in sys.version:
-    if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
-        os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
+    if "DYLD_FALLBACK_LIBRARY_PATH" in os.environ:
+        os.environ["DYLD_FALLBACK_LIBRARY_PATH"] += ":/usr/lib"
        # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite

 from gym import error
@@ -14,28 +14,33 @@ from gym import error
 try:
    import pyglet
 except ImportError as e:
-    raise ImportError('''
+    raise ImportError(
+        """
    Cannot import pyglet.
    HINT: you can install pyglet directly via 'pip install pyglet'.
    But if you really just want to install all Gym dependencies and not have to think about it,
    'pip install -e .[all]' or 'pip install gym[all]' will do it.
-    ''')
+    """
+    )

 try:
    from pyglet.gl import *
 except ImportError as e:
-    raise ImportError('''
+    raise ImportError(
+        """
    Error occurred while running `from pyglet.gl import *`
    HINT: make sure you have OpenGL installed. On Ubuntu, you can run 'apt-get install python-opengl'.
    If you're running on a server, you may need a virtual frame buffer; something like this should work:
    'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'
-    ''')
+    """
+    )

 import math
 import numpy as np

 RAD2DEG = 57.29577951308232

+
 def get_display(spec):
    """Convert a display specification (such as :0) into an actual Display
    object.
@@ -49,17 +54,30 @@ def get_display(spec):
    elif isinstance(spec, str):
        return pyglet.canvas.Display(spec)
    else:
-        raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
+        raise error.Error(
+            "Invalid display specification: {}. (Must be a string like :0 or None.)".format(
+                spec
+            )
+        )
+

 def get_window(width, height, display, **kwargs):
    """
    Will create a pyglet window from the display specification provided.
    """
-    screen = display.get_screens() #available screens
-    config = screen[0].get_best_config() #selecting the first screen
-    context = config.create_context(None) #create GL context
+    screen = display.get_screens()  # available screens
+    config = screen[0].get_best_config()  # selecting the first screen
+    context = config.create_context(None)  # create GL context
+
+    return pyglet.window.Window(
+        width=width,
+        height=height,
+        display=display,
+        config=config,
+        context=context,
+        **kwargs
+    )

-    return pyglet.window.Window(width=width, height=height, display=display, config=config, context=context, **kwargs)

 class Viewer(object):
    def __init__(self, width, height, display=None):
@@ -88,11 +106,11 @@ class Viewer(object):

    def set_bounds(self, left, right, bottom, top):
        assert right > left and top > bottom
-        scalex = self.width/(right-left)
-        scaley = self.height/(top-bottom)
+        scalex = self.width / (right - left)
+        scaley = self.height / (top - bottom)
        self.transform = Transform(
-            translation=(-left*scalex, -bottom*scaley),
-            scale=(scalex, scaley))
+            translation=(-left * scalex, -bottom * scaley), scale=(scalex, scaley)
+        )

    def add_geom(self, geom):
        self.geoms.append(geom)
@@ -101,7 +119,7 @@ class Viewer(object):
        self.onetime_geoms.append(geom)

    def render(self, return_rgb_array=False):
-        glClearColor(1,1,1,1)
+        glClearColor(1, 1, 1, 1)
        self.window.clear()
        self.window.switch_to()
        self.window.dispatch_events()
@@ -123,7 +141,7 @@ class Viewer(object):
            # the boundary.) So we use the buffer height/width rather
            # than the requested one.
            arr = arr.reshape(buffer.height, buffer.width, 4)
-            arr = arr[::-1,:,0:3]
+            arr = arr[::-1, :, 0:3]
        self.window.flip()
        self.onetime_geoms = []
        return arr if return_rgb_array else self.isopen
@@ -155,140 +173,181 @@ class Viewer(object):

    def get_array(self):
        self.window.flip()
-        image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
+        image_data = (
+            pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
+        )
        self.window.flip()
-        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
+        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
        arr = arr.reshape(self.height, self.width, 4)
-        return arr[::-1,:,0:3]
+        return arr[::-1, :, 0:3]

    def __del__(self):
        self.close()

+
 def _add_attrs(geom, attrs):
    if "color" in attrs:
        geom.set_color(*attrs["color"])
    if "linewidth" in attrs:
        geom.set_linewidth(attrs["linewidth"])

+
 class Geom(object):
    def __init__(self):
-        self._color=Color((0, 0, 0, 1.0))
+        self._color = Color((0, 0, 0, 1.0))
        self.attrs = [self._color]
+
    def render(self):
        for attr in reversed(self.attrs):
            attr.enable()
        self.render1()
        for attr in self.attrs:
            attr.disable()
+
    def render1(self):
        raise NotImplementedError
+
    def add_attr(self, attr):
        self.attrs.append(attr)
+
    def set_color(self, r, g, b):
        self._color.vec4 = (r, g, b, 1)

+
 class Attr(object):
    def enable(self):
        raise NotImplementedError
+
    def disable(self):
        pass

+
 class Transform(Attr):
-    def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
+    def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1, 1)):
        self.set_translation(*translation)
        self.set_rotation(rotation)
        self.set_scale(*scale)
+
    def enable(self):
        glPushMatrix()
-        glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
+        glTranslatef(
+            self.translation[0], self.translation[1], 0
+        )  # translate to GL loc ppint
        glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
        glScalef(self.scale[0], self.scale[1], 1)
+
    def disable(self):
        glPopMatrix()
+
    def set_translation(self, newx, newy):
        self.translation = (float(newx), float(newy))
+
    def set_rotation(self, new):
        self.rotation = float(new)
+
    def set_scale(self, newx, newy):
        self.scale = (float(newx), float(newy))

+
 class Color(Attr):
    def __init__(self, vec4):
        self.vec4 = vec4
+
    def enable(self):
        glColor4f(*self.vec4)

+
 class LineStyle(Attr):
    def __init__(self, style):
        self.style = style
+
    def enable(self):
        glEnable(GL_LINE_STIPPLE)
        glLineStipple(1, self.style)
+
    def disable(self):
        glDisable(GL_LINE_STIPPLE)

+
 class LineWidth(Attr):
    def __init__(self, stroke):
        self.stroke = stroke
+
    def enable(self):
        glLineWidth(self.stroke)

+
 class Point(Geom):
    def __init__(self):
        Geom.__init__(self)
+
    def render1(self):
-        glBegin(GL_POINTS) # draw point
+        glBegin(GL_POINTS)  # draw point
        glVertex3f(0.0, 0.0, 0.0)
        glEnd()

+
 class FilledPolygon(Geom):
    def __init__(self, v):
        Geom.__init__(self)
        self.v = v
+
    def render1(self):
-        if   len(self.v) == 4 : glBegin(GL_QUADS)
-        elif len(self.v)  > 4 : glBegin(GL_POLYGON)
-        else: glBegin(GL_TRIANGLES)
+        if len(self.v) == 4:
+            glBegin(GL_QUADS)
+        elif len(self.v) > 4:
+            glBegin(GL_POLYGON)
+        else:
+            glBegin(GL_TRIANGLES)
        for p in self.v:
-            glVertex3f(p[0], p[1],0)  # draw each vertex
+            glVertex3f(p[0], p[1], 0)  # draw each vertex
        glEnd()

+
 def make_circle(radius=10, res=30, filled=True):
    points = []
    for i in range(res):
-        ang = 2*math.pi*i / res
-        points.append((math.cos(ang)*radius, math.sin(ang)*radius))
+        ang = 2 * math.pi * i / res
+        points.append((math.cos(ang) * radius, math.sin(ang) * radius))
    if filled:
        return FilledPolygon(points)
    else:
        return PolyLine(points, True)

+
 def make_polygon(v, filled=True):
-    if filled: return FilledPolygon(v)
-    else: return PolyLine(v, True)
+    if filled:
+        return FilledPolygon(v)
+    else:
+        return PolyLine(v, True)
+

 def make_polyline(v):
    return PolyLine(v, False)

+
 def make_capsule(length, width):
-    l, r, t, b = 0, length, width/2, -width/2
-    box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
-    circ0 = make_circle(width/2)
-    circ1 = make_circle(width/2)
+    l, r, t, b = 0, length, width / 2, -width / 2
+    box = make_polygon([(l, b), (l, t), (r, t), (r, b)])
+    circ0 = make_circle(width / 2)
+    circ1 = make_circle(width / 2)
    circ1.add_attr(Transform(translation=(length, 0)))
    geom = Compound([box, circ0, circ1])
    return geom

+
 class Compound(Geom):
    def __init__(self, gs):
        Geom.__init__(self)
        self.gs = gs
        for g in self.gs:
            g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
+
    def render1(self):
        for g in self.gs:
            g.render()

+
 class PolyLine(Geom):
    def __init__(self, v, close):
        Geom.__init__(self)
@@ -296,14 +355,17 @@ class PolyLine(Geom):
        self.close = close
        self.linewidth = LineWidth(1)
        self.add_attr(self.linewidth)
+
    def render1(self):
        glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
        for p in self.v:
-            glVertex3f(p[0], p[1],0)  # draw each vertex
+            glVertex3f(p[0], p[1], 0)  # draw each vertex
        glEnd()
+
    def set_linewidth(self, x):
        self.linewidth.stroke = x

+
 class Line(Geom):
    def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
        Geom.__init__(self)
@@ -318,6 +380,7 @@ class Line(Geom):
        glVertex2f(*self.end)
        glEnd()

+
 class Image(Geom):
    def __init__(self, fname, width, height):
        Geom.__init__(self)
@@ -327,17 +390,23 @@ class Image(Geom):
        img = pyglet.image.load(fname)
        self.img = img
        self.flip = False
+
    def render1(self):
-        self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
+        self.img.blit(
+            -self.width / 2, -self.height / 2, width=self.width, height=self.height
+        )
+

 # ================================================================

+
 class SimpleImageViewer(object):
    def __init__(self, display=None, maxwidth=500):
        self.window = None
        self.isopen = False
        self.display = get_display(display)
        self.maxwidth = maxwidth
+
    def imshow(self, arr):
        if self.window is None:
            height, width, _channels = arr.shape
@@ -345,7 +414,13 @@ class SimpleImageViewer(object):
                scale = self.maxwidth / width
                width = int(scale * width)
                height = int(scale * height)
-            self.window = get_window(width=width, height=height, display=self.display, vsync=False, resizable=True)
+            self.window = get_window(
+                width=width,
+                height=height,
+                display=self.display,
+                vsync=False,
+                resizable=True,
+            )
            self.width = width
            self.height = height
            self.isopen = True
@@ -360,18 +435,19 @@ class SimpleImageViewer(object):
                self.isopen = False

        assert len(arr.shape) == 3, "You passed in an image with the wrong number shape"
-        image = pyglet.image.ImageData(arr.shape[1], arr.shape[0],
-            'RGB', arr.tobytes(), pitch=arr.shape[1]*-3)
-        gl.glTexParameteri(gl.GL_TEXTURE_2D,
-            gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
+        image = pyglet.image.ImageData(
+            arr.shape[1], arr.shape[0], "RGB", arr.tobytes(), pitch=arr.shape[1] * -3
+        )
+        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
        texture = image.get_texture()
        texture.width = self.width
        texture.height = self.height
        self.window.clear()
        self.window.switch_to()
        self.window.dispatch_events()
-        texture.blit(0, 0) # draw
+        texture.blit(0, 0)  # draw
        self.window.flip()
+
    def close(self):
        if self.isopen and sys.meta_path:
            # ^^^ check sys.meta_path to avoid 'ImportError: sys.meta_path is None, Python is likely shutting down'
--- a/gym/envs/mujoco/init.py
+++ b/gym/envs/mujoco/init.py
@@ -1,4 +1,5 @@
 from gym.envs.mujoco.mujoco_env import MujocoEnv
+
 # ^^^^^ so that user gets the correct error
 # message if mujoco is not installed correctly
 from gym.envs.mujoco.ant import AntEnv
--- a/gym/envs/mujoco/ant.py
+++ b/gym/envs/mujoco/ant.py
@@ -2,42 +2,53 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
-        mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5)
        utils.EzPickle.__init__(self)

    def step(self, a):
        xposbefore = self.get_body_com("torso")[0]
        self.do_simulation(a, self.frame_skip)
        xposafter = self.get_body_com("torso")[0]
-        forward_reward = (xposafter - xposbefore)/self.dt
-        ctrl_cost = .5 * np.square(a).sum()
-        contact_cost = 0.5 * 1e-3 * np.sum(
-            np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
+        forward_reward = (xposafter - xposbefore) / self.dt
+        ctrl_cost = 0.5 * np.square(a).sum()
+        contact_cost = (
+            0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
+        )
        survive_reward = 1.0
        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
        state = self.state_vector()
-        notdone = np.isfinite(state).all() \
-            and state[2] >= 0.2 and state[2] <= 1.0
+        notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
        done = not notdone
        ob = self._get_obs()
-        return ob, reward, done, dict(
-            reward_forward=forward_reward,
-            reward_ctrl=-ctrl_cost,
-            reward_contact=-contact_cost,
-            reward_survive=survive_reward)
+        return (
+            ob,
+            reward,
+            done,
+            dict(
+                reward_forward=forward_reward,
+                reward_ctrl=-ctrl_cost,
+                reward_contact=-contact_cost,
+                reward_survive=survive_reward,
+            ),
+        )

    def _get_obs(self):
-        return np.concatenate([
-            self.sim.data.qpos.flat[2:],
-            self.sim.data.qvel.flat,
-            np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
-        ])
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[2:],
+                self.sim.data.qvel.flat,
+                np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
+            ]
+        )

    def reset_model(self):
-        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
-        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.model.nq, low=-0.1, high=0.1
+        )
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/ant_v3.py
+++ b/gym/envs/mujoco/ant_v3.py
@@ -4,21 +4,23 @@ from gym.envs.mujoco import mujoco_env


 DEFAULT_CAMERA_CONFIG = {
-    'distance': 4.0,
+    "distance": 4.0,
 }


 class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
-    def __init__(self,
-                 xml_file='ant.xml',
-                 ctrl_cost_weight=0.5,
-                 contact_cost_weight=5e-4,
-                 healthy_reward=1.0,
-                 terminate_when_unhealthy=True,
-                 healthy_z_range=(0.2, 1.0),
-                 contact_force_range=(-1.0, 1.0),
-                 reset_noise_scale=0.1,
-                 exclude_current_positions_from_observation=True):
+    def __init__(
+        self,
+        xml_file="ant.xml",
+        ctrl_cost_weight=0.5,
+        contact_cost_weight=5e-4,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.2, 1.0),
+        contact_force_range=(-1.0, 1.0),
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+    ):
        utils.EzPickle.__init__(**locals())

        self._ctrl_cost_weight = ctrl_cost_weight
@@ -33,16 +35,17 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self._reset_noise_scale = reset_noise_scale

        self._exclude_current_positions_from_observation = (
-            exclude_current_positions_from_observation)
+            exclude_current_positions_from_observation
+        )

        mujoco_env.MujocoEnv.__init__(self, xml_file, 5)

    @property
    def healthy_reward(self):
-        return float(
-            self.is_healthy
-            or self._terminate_when_unhealthy
-        ) * self._healthy_reward
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )

    def control_cost(self, action):
        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@@ -58,21 +61,20 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    @property
    def contact_cost(self):
        contact_cost = self._contact_cost_weight * np.sum(
-            np.square(self.contact_forces))
+            np.square(self.contact_forces)
+        )
        return contact_cost

    @property
    def is_healthy(self):
        state = self.state_vector()
        min_z, max_z = self._healthy_z_range
-        is_healthy = (np.isfinite(state).all() and min_z <= state[2] <= max_z)
+        is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
        return is_healthy

    @property
    def done(self):
-        done = (not self.is_healthy
-                if self._terminate_when_unhealthy
-                else False)
+        done = not self.is_healthy if self._terminate_when_unhealthy else False
        return done

    def step(self, action):
@@ -96,18 +98,16 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        done = self.done
        observation = self._get_obs()
        info = {
-            'reward_forward': forward_reward,
-            'reward_ctrl': -ctrl_cost,
-            'reward_contact': -contact_cost,
-            'reward_survive': healthy_reward,
-
-            'x_position': xy_position_after[0],
-            'y_position': xy_position_after[1],
-            'distance_from_origin': np.linalg.norm(xy_position_after, ord=2),
-
-            'x_velocity': x_velocity,
-            'y_velocity': y_velocity,
-            'forward_reward': forward_reward,
+            "reward_forward": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "reward_contact": -contact_cost,
+            "reward_survive": healthy_reward,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
        }

        return observation, reward, done, info
@@ -129,9 +129,11 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        noise_high = self._reset_noise_scale

        qpos = self.init_qpos + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nq)
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
        qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
-            self.model.nv)
+            self.model.nv
+        )
        self.set_state(qpos, qvel)

        observation = self._get_obs()
--- a/gym/envs/mujoco/half_cheetah.py
+++ b/gym/envs/mujoco/half_cheetah.py
@@ -2,9 +2,10 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
-        mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 5)
        utils.EzPickle.__init__(self)

    def step(self, action):
@@ -12,21 +13,25 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self.do_simulation(action, self.frame_skip)
        xposafter = self.sim.data.qpos[0]
        ob = self._get_obs()
-        reward_ctrl = - 0.1 * np.square(action).sum()
-        reward_run = (xposafter - xposbefore)/self.dt
+        reward_ctrl = -0.1 * np.square(action).sum()
+        reward_run = (xposafter - xposbefore) / self.dt
        reward = reward_ctrl + reward_run
        done = False
        return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)

    def _get_obs(self):
-        return np.concatenate([
-            self.sim.data.qpos.flat[1:],
-            self.sim.data.qvel.flat,
-        ])
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[1:],
+                self.sim.data.qvel.flat,
+            ]
+        )

    def reset_model(self):
-        qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
-        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=-0.1, high=0.1, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/half_cheetah_v3.py
+++ b/gym/envs/mujoco/half_cheetah_v3.py
@@ -4,17 +4,19 @@ from gym.envs.mujoco import mujoco_env


 DEFAULT_CAMERA_CONFIG = {
-    'distance': 4.0,
+    "distance": 4.0,
 }


 class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
-    def __init__(self,
-                 xml_file='half_cheetah.xml',
-                 forward_reward_weight=1.0,
-                 ctrl_cost_weight=0.1,
-                 reset_noise_scale=0.1,
-                 exclude_current_positions_from_observation=True):
+    def __init__(
+        self,
+        xml_file="half_cheetah.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=0.1,
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+    ):
        utils.EzPickle.__init__(**locals())

        self._forward_reward_weight = forward_reward_weight
@@ -24,7 +26,8 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self._reset_noise_scale = reset_noise_scale

        self._exclude_current_positions_from_observation = (
-            exclude_current_positions_from_observation)
+            exclude_current_positions_from_observation
+        )

        mujoco_env.MujocoEnv.__init__(self, xml_file, 5)

@@ -36,8 +39,7 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        x_position_before = self.sim.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        x_position_after = self.sim.data.qpos[0]
-        x_velocity = ((x_position_after - x_position_before)
-                      / self.dt)
+        x_velocity = (x_position_after - x_position_before) / self.dt

        ctrl_cost = self.control_cost(action)

@@ -47,11 +49,10 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        reward = forward_reward - ctrl_cost
        done = False
        info = {
-            'x_position': x_position_after,
-            'x_velocity': x_velocity,
-
-            'reward_run': forward_reward,
-            'reward_ctrl': -ctrl_cost
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+            "reward_run": forward_reward,
+            "reward_ctrl": -ctrl_cost,
        }

        return observation, reward, done, info
@@ -71,9 +72,11 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        noise_high = self._reset_noise_scale

        qpos = self.init_qpos + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nq)
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
        qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
-            self.model.nv)
+            self.model.nv
+        )

        self.set_state(qpos, qvel)

--- a/gym/envs/mujoco/hopper.py
+++ b/gym/envs/mujoco/hopper.py
@@ -2,9 +2,10 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
-        mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
+        mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4)
        utils.EzPickle.__init__(self)

    def step(self, a):
@@ -16,20 +17,27 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        reward += alive_bonus
        reward -= 1e-3 * np.square(a).sum()
        s = self.state_vector()
-        done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
-                    (height > .7) and (abs(ang) < .2))
+        done = not (
+            np.isfinite(s).all()
+            and (np.abs(s[2:]) < 100).all()
+            and (height > 0.7)
+            and (abs(ang) < 0.2)
+        )
        ob = self._get_obs()
        return ob, reward, done, {}

    def _get_obs(self):
-        return np.concatenate([
-            self.sim.data.qpos.flat[1:],
-            np.clip(self.sim.data.qvel.flat, -10, 10)
-        ])
+        return np.concatenate(
+            [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
+        )

    def reset_model(self):
-        qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
-        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/hopper_v3.py
+++ b/gym/envs/mujoco/hopper_v3.py
@@ -4,25 +4,27 @@ from gym import utils


 DEFAULT_CAMERA_CONFIG = {
-    'trackbodyid': 2,
-    'distance': 3.0,
-    'lookat': np.array((0.0, 0.0, 1.15)),
-    'elevation': -20.0,
+    "trackbodyid": 2,
+    "distance": 3.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
 }


 class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
-    def __init__(self,
-                 xml_file='hopper.xml',
-                 forward_reward_weight=1.0,
-                 ctrl_cost_weight=1e-3,
-                 healthy_reward=1.0,
-                 terminate_when_unhealthy=True,
-                 healthy_state_range=(-100.0, 100.0),
-                 healthy_z_range=(0.7, float('inf')),
-                 healthy_angle_range=(-0.2, 0.2),
-                 reset_noise_scale=5e-3,
-                 exclude_current_positions_from_observation=True):
+    def __init__(
+        self,
+        xml_file="hopper.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_state_range=(-100.0, 100.0),
+        healthy_z_range=(0.7, float("inf")),
+        healthy_angle_range=(-0.2, 0.2),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+    ):
        utils.EzPickle.__init__(**locals())

        self._forward_reward_weight = forward_reward_weight
@@ -39,16 +41,17 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self._reset_noise_scale = reset_noise_scale

        self._exclude_current_positions_from_observation = (
-            exclude_current_positions_from_observation)
+            exclude_current_positions_from_observation
+        )

        mujoco_env.MujocoEnv.__init__(self, xml_file, 4)

    @property
    def healthy_reward(self):
-        return float(
-            self.is_healthy
-            or self._terminate_when_unhealthy
-        ) * self._healthy_reward
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )

    def control_cost(self, action):
        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@@ -63,8 +66,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        min_z, max_z = self._healthy_z_range
        min_angle, max_angle = self._healthy_angle_range

-        healthy_state = np.all(
-            np.logical_and(min_state < state, state < max_state))
+        healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
        healthy_z = min_z < z < max_z
        healthy_angle = min_angle < angle < max_angle

@@ -74,15 +76,12 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    @property
    def done(self):
-        done = (not self.is_healthy
-                if self._terminate_when_unhealthy
-                else False)
+        done = not self.is_healthy if self._terminate_when_unhealthy else False
        return done

    def _get_obs(self):
        position = self.sim.data.qpos.flat.copy()
-        velocity = np.clip(
-            self.sim.data.qvel.flat.copy(), -10, 10)
+        velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)

        if self._exclude_current_positions_from_observation:
            position = position[1:]
@@ -94,8 +93,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        x_position_before = self.sim.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        x_position_after = self.sim.data.qpos[0]
-        x_velocity = ((x_position_after - x_position_before)
-                      / self.dt)
+        x_velocity = (x_position_after - x_position_before) / self.dt

        ctrl_cost = self.control_cost(action)

@@ -109,8 +107,8 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        reward = rewards - costs
        done = self.done
        info = {
-            'x_position': x_position_after,
-            'x_velocity': x_velocity,
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
        }

        return observation, reward, done, info
@@ -120,9 +118,11 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        noise_high = self._reset_noise_scale

        qpos = self.init_qpos + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nq)
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
        qvel = self.init_qvel + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nv)
+            low=noise_low, high=noise_high, size=self.model.nv
+        )

        self.set_state(qpos, qvel)

--- a/gym/envs/mujoco/humanoid.py
+++ b/gym/envs/mujoco/humanoid.py
@@ -2,24 +2,30 @@ import numpy as np
 from gym.envs.mujoco import mujoco_env
 from gym import utils

+
 def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

+
 class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
-        mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5)
        utils.EzPickle.__init__(self)

    def _get_obs(self):
        data = self.sim.data
-        return np.concatenate([data.qpos.flat[2:],
-                               data.qvel.flat,
-                               data.cinert.flat,
-                               data.cvel.flat,
-                               data.qfrc_actuator.flat,
-                               data.cfrc_ext.flat])
+        return np.concatenate(
+            [
+                data.qpos.flat[2:],
+                data.qvel.flat,
+                data.cinert.flat,
+                data.cvel.flat,
+                data.qfrc_actuator.flat,
+                data.cfrc_ext.flat,
+            ]
+        )

    def step(self, a):
        pos_before = mass_center(self.model, self.sim)
@@ -29,18 +35,33 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        data = self.sim.data
        lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
-        quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
        quad_impact_cost = min(quad_impact_cost, 10)
        reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
        qpos = self.sim.data.qpos
        done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
-        return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
+        return (
+            self._get_obs(),
+            reward,
+            done,
+            dict(
+                reward_linvel=lin_vel_cost,
+                reward_quadctrl=-quad_ctrl_cost,
+                reward_alive=alive_bonus,
+                reward_impact=-quad_impact_cost,
+            ),
+        )

    def reset_model(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
-            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
+            self.init_qvel
+            + self.np_random.uniform(
+                low=-c,
+                high=c,
+                size=self.model.nv,
+            ),
        )
        return self._get_obs()

--- a/gym/envs/mujoco/humanoid_v3.py
+++ b/gym/envs/mujoco/humanoid_v3.py
@@ -4,10 +4,10 @@ from gym import utils


 DEFAULT_CAMERA_CONFIG = {
-    'trackbodyid': 1,
-    'distance': 4.0,
-    'lookat': np.array((0.0, 0.0, 2.0)),
-    'elevation': -20.0,
+    "trackbodyid": 1,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 2.0)),
+    "elevation": -20.0,
 }


@@ -18,17 +18,19 @@ def mass_center(model, sim):


 class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
-    def __init__(self,
-                 xml_file='humanoid.xml',
-                 forward_reward_weight=1.25,
-                 ctrl_cost_weight=0.1,
-                 contact_cost_weight=5e-7,
-                 contact_cost_range=(-np.inf, 10.0),
-                 healthy_reward=5.0,
-                 terminate_when_unhealthy=True,
-                 healthy_z_range=(1.0, 2.0),
-                 reset_noise_scale=1e-2,
-                 exclude_current_positions_from_observation=True):
+    def __init__(
+        self,
+        xml_file="humanoid.xml",
+        forward_reward_weight=1.25,
+        ctrl_cost_weight=0.1,
+        contact_cost_weight=5e-7,
+        contact_cost_range=(-np.inf, 10.0),
+        healthy_reward=5.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(1.0, 2.0),
+        reset_noise_scale=1e-2,
+        exclude_current_positions_from_observation=True,
+    ):
        utils.EzPickle.__init__(**locals())

        self._forward_reward_weight = forward_reward_weight
@@ -42,27 +44,26 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self._reset_noise_scale = reset_noise_scale

        self._exclude_current_positions_from_observation = (
-            exclude_current_positions_from_observation)
+            exclude_current_positions_from_observation
+        )

        mujoco_env.MujocoEnv.__init__(self, xml_file, 5)

    @property
    def healthy_reward(self):
-        return float(
-            self.is_healthy
-            or self._terminate_when_unhealthy
-        ) * self._healthy_reward
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )

    def control_cost(self, action):
-        control_cost = self._ctrl_cost_weight * np.sum(
-            np.square(self.sim.data.ctrl))
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl))
        return control_cost

    @property
    def contact_cost(self):
        contact_forces = self.sim.data.cfrc_ext
-        contact_cost = self._contact_cost_weight * np.sum(
-            np.square(contact_forces))
+        contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces))
        min_cost, max_cost = self._contact_cost_range
        contact_cost = np.clip(contact_cost, min_cost, max_cost)
        return contact_cost
@@ -76,9 +77,7 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    @property
    def done(self):
-        done = ((not self.is_healthy)
-                if self._terminate_when_unhealthy
-                else False)
+        done = (not self.is_healthy) if self._terminate_when_unhealthy else False
        return done

    def _get_obs(self):
@@ -94,14 +93,16 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        if self._exclude_current_positions_from_observation:
            position = position[2:]

-        return np.concatenate((
-            position,
-            velocity,
-            com_inertia,
-            com_velocity,
-            actuator_forces,
-            external_contact_forces,
-        ))
+        return np.concatenate(
+            (
+                position,
+                velocity,
+                com_inertia,
+                com_velocity,
+                actuator_forces,
+                external_contact_forces,
+            )
+        )

    def step(self, action):
        xy_position_before = mass_center(self.model, self.sim)
@@ -124,18 +125,16 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        reward = rewards - costs
        done = self.done
        info = {
-            'reward_linvel': forward_reward,
-            'reward_quadctrl': -ctrl_cost,
-            'reward_alive': healthy_reward,
-            'reward_impact': -contact_cost,
-
-            'x_position': xy_position_after[0],
-            'y_position': xy_position_after[1],
-            'distance_from_origin': np.linalg.norm(xy_position_after, ord=2),
-
-            'x_velocity': x_velocity,
-            'y_velocity': y_velocity,
-            'forward_reward': forward_reward,
+            "reward_linvel": forward_reward,
+            "reward_quadctrl": -ctrl_cost,
+            "reward_alive": healthy_reward,
+            "reward_impact": -contact_cost,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
        }

        return observation, reward, done, info
@@ -145,9 +144,11 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        noise_high = self._reset_noise_scale

        qpos = self.init_qpos + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nq)
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
        qvel = self.init_qvel + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nv)
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
        self.set_state(qpos, qvel)

        observation = self._get_obs()
--- a/gym/envs/mujoco/humanoidstandup.py
+++ b/gym/envs/mujoco/humanoidstandup.py
@@ -2,19 +2,24 @@ from gym.envs.mujoco import mujoco_env
 from gym import utils
 import numpy as np

+
 class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
-        mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5)
        utils.EzPickle.__init__(self)

    def _get_obs(self):
        data = self.sim.data
-        return np.concatenate([data.qpos.flat[2:],
-                               data.qvel.flat,
-                               data.cinert.flat,
-                               data.cvel.flat,
-                               data.qfrc_actuator.flat,
-                               data.cfrc_ext.flat])
+        return np.concatenate(
+            [
+                data.qpos.flat[2:],
+                data.qvel.flat,
+                data.cinert.flat,
+                data.cvel.flat,
+                data.qfrc_actuator.flat,
+                data.cfrc_ext.flat,
+            ]
+        )

    def step(self, a):
        self.do_simulation(a, self.frame_skip)
@@ -23,18 +28,32 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        uph_cost = (pos_after - 0) / self.model.opt.timestep

        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
-        quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
        quad_impact_cost = min(quad_impact_cost, 10)
        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1

        done = bool(False)
-        return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
+        return (
+            self._get_obs(),
+            reward,
+            done,
+            dict(
+                reward_linup=uph_cost,
+                reward_quadctrl=-quad_ctrl_cost,
+                reward_impact=-quad_impact_cost,
+            ),
+        )

    def reset_model(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
-            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
+            self.init_qvel
+            + self.np_random.uniform(
+                low=-c,
+                high=c,
+                size=self.model.nv,
+            ),
        )
        return self._get_obs()

--- a/gym/envs/mujoco/inverted_double_pendulum.py
+++ b/gym/envs/mujoco/inverted_double_pendulum.py
@@ -2,10 +2,10 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

-class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):

+class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
-        mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "inverted_double_pendulum.xml", 5)
        utils.EzPickle.__init__(self)

    def step(self, action):
@@ -14,25 +14,28 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        x, _, y = self.sim.data.site_xpos[0]
        dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
        v1, v2 = self.sim.data.qvel[1:3]
-        vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
+        vel_penalty = 1e-3 * v1 ** 2 + 5e-3 * v2 ** 2
        alive_bonus = 10
        r = alive_bonus - dist_penalty - vel_penalty
        done = bool(y <= 1)
        return ob, r, done, {}

    def _get_obs(self):
-        return np.concatenate([
-            self.sim.data.qpos[:1],  # cart x pos
-            np.sin(self.sim.data.qpos[1:]),  # link angles
-            np.cos(self.sim.data.qpos[1:]),
-            np.clip(self.sim.data.qvel, -10, 10),
-            np.clip(self.sim.data.qfrc_constraint, -10, 10)
-        ]).ravel()
+        return np.concatenate(
+            [
+                self.sim.data.qpos[:1],  # cart x pos
+                np.sin(self.sim.data.qpos[1:]),  # link angles
+                np.cos(self.sim.data.qpos[1:]),
+                np.clip(self.sim.data.qvel, -10, 10),
+                np.clip(self.sim.data.qfrc_constraint, -10, 10),
+            ]
+        ).ravel()

    def reset_model(self):
        self.set_state(
-            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
-            self.init_qvel + self.np_random.randn(self.model.nv) * .1
+            self.init_qpos
+            + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
+            self.init_qvel + self.np_random.randn(self.model.nv) * 0.1,
        )
        return self._get_obs()

--- a/gym/envs/mujoco/inverted_pendulum.py
+++ b/gym/envs/mujoco/inverted_pendulum.py
@@ -2,22 +2,27 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
-        mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
+        mujoco_env.MujocoEnv.__init__(self, "inverted_pendulum.xml", 2)

    def step(self, a):
        reward = 1.0
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
-        notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
+        notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= 0.2)
        done = not notdone
        return ob, reward, done, {}

    def reset_model(self):
-        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
-        qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.model.nq, low=-0.01, high=0.01
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            size=self.model.nv, low=-0.01, high=0.01
+        )
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/mujoco_env.py
+++ b/gym/envs/mujoco/mujoco_env.py
@@ -11,20 +11,28 @@ import gym
 try:
    import mujoco_py
 except ImportError as e:
-    raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+    raise error.DependencyNotInstalled(
+        "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
+            e
+        )
+    )

 DEFAULT_SIZE = 500


 def convert_observation_to_space(observation):
    if isinstance(observation, dict):
-        space = spaces.Dict(OrderedDict([
-            (key, convert_observation_to_space(value))
-            for key, value in observation.items()
-        ]))
+        space = spaces.Dict(
+            OrderedDict(
+                [
+                    (key, convert_observation_to_space(value))
+                    for key, value in observation.items()
+                ]
+            )
+        )
    elif isinstance(observation, np.ndarray):
-        low = np.full(observation.shape, -float('inf'), dtype=np.float32)
-        high = np.full(observation.shape, float('inf'), dtype=np.float32)
+        low = np.full(observation.shape, -float("inf"), dtype=np.float32)
+        high = np.full(observation.shape, float("inf"), dtype=np.float32)
        space = spaces.Box(low, high, dtype=observation.dtype)
    else:
        raise NotImplementedError(type(observation), observation)
@@ -33,8 +41,7 @@ def convert_observation_to_space(observation):


 class MujocoEnv(gym.Env):
-    """Superclass for all MuJoCo environments.
-    """
+    """Superclass for all MuJoCo environments."""

    def __init__(self, model_path, frame_skip):
        if model_path.startswith("/"):
@@ -51,8 +58,8 @@ class MujocoEnv(gym.Env):
        self._viewers = {}

        self.metadata = {
-            'render.modes': ['human', 'rgb_array', 'depth_array'],
-            'video.frames_per_second': int(np.round(1.0 / self.dt))
+            "render.modes": ["human", "rgb_array", "depth_array"],
+            "video.frames_per_second": int(np.round(1.0 / self.dt)),
        }

        self.init_qpos = self.sim.data.qpos.ravel().copy()
@@ -110,8 +117,9 @@ class MujocoEnv(gym.Env):
    def set_state(self, qpos, qvel):
        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
        old_state = self.sim.get_state()
-        new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
-                                         old_state.act, old_state.udd_state)
+        new_state = mujoco_py.MjSimState(
+            old_state.time, qpos, qvel, old_state.act, old_state.udd_state
+        )
        self.sim.set_state(new_state)
        self.sim.forward()

@@ -124,39 +132,43 @@ class MujocoEnv(gym.Env):
        for _ in range(n_frames):
            self.sim.step()

-    def render(self,
-               mode='human',
-               width=DEFAULT_SIZE,
-               height=DEFAULT_SIZE,
-               camera_id=None,
-               camera_name=None):
-        if mode == 'rgb_array' or mode == 'depth_array':
+    def render(
+        self,
+        mode="human",
+        width=DEFAULT_SIZE,
+        height=DEFAULT_SIZE,
+        camera_id=None,
+        camera_name=None,
+    ):
+        if mode == "rgb_array" or mode == "depth_array":
            if camera_id is not None and camera_name is not None:
-                raise ValueError("Both `camera_id` and `camera_name` cannot be"
-                                 " specified at the same time.")
+                raise ValueError(
+                    "Both `camera_id` and `camera_name` cannot be"
+                    " specified at the same time."
+                )

            no_camera_specified = camera_name is None and camera_id is None
            if no_camera_specified:
-                camera_name = 'track'
+                camera_name = "track"

            if camera_id is None and camera_name in self.model._camera_name2id:
                camera_id = self.model.camera_name2id(camera_name)

            self._get_viewer(mode).render(width, height, camera_id=camera_id)

-        if mode == 'rgb_array':
+        if mode == "rgb_array":
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width, height, depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
-        elif mode == 'depth_array':
+        elif mode == "depth_array":
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            # Extract depth part of the read_pixels() tuple
            data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
            # original image is upside-down, so flip it
            return data[::-1, :]
-        elif mode == 'human':
+        elif mode == "human":
            self._get_viewer(mode).render()

    def close(self):
@@ -168,9 +180,9 @@ class MujocoEnv(gym.Env):
    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
-            if mode == 'human':
+            if mode == "human":
                self.viewer = mujoco_py.MjViewer(self.sim)
-            elif mode == 'rgb_array' or mode == 'depth_array':
+            elif mode == "rgb_array" or mode == "depth_array":
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)

            self.viewer_setup()
@@ -181,7 +193,4 @@ class MujocoEnv(gym.Env):
        return self.data.get_body_xpos(body_name)

    def state_vector(self):
-        return np.concatenate([
-            self.sim.data.qpos.flat,
-            self.sim.data.qvel.flat
-        ])
+        return np.concatenate([self.sim.data.qpos.flat, self.sim.data.qvel.flat])
--- a/gym/envs/mujoco/pusher.py
+++ b/gym/envs/mujoco/pusher.py
@@ -4,25 +4,25 @@ from gym.envs.mujoco import mujoco_env

 import mujoco_py

+
 class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
-        mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5)

    def step(self, a):
        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
        vec_2 = self.get_body_com("object") - self.get_body_com("goal")

-        reward_near = - np.linalg.norm(vec_1)
-        reward_dist = - np.linalg.norm(vec_2)
-        reward_ctrl = - np.square(a).sum()
+        reward_near = -np.linalg.norm(vec_1)
+        reward_dist = -np.linalg.norm(vec_2)
+        reward_ctrl = -np.square(a).sum()
        reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near

        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        done = False
-        return ob, reward, done, dict(reward_dist=reward_dist,
-                reward_ctrl=reward_ctrl)
+        return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)

    def viewer_setup(self):
        self.viewer.cam.trackbodyid = -1
@@ -33,25 +33,31 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):

        self.goal_pos = np.asarray([0, 0])
        while True:
-            self.cylinder_pos = np.concatenate([
+            self.cylinder_pos = np.concatenate(
+                [
                    self.np_random.uniform(low=-0.3, high=0, size=1),
-                    self.np_random.uniform(low=-0.2, high=0.2, size=1)])
+                    self.np_random.uniform(low=-0.2, high=0.2, size=1),
+                ]
+            )
            if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
                break

        qpos[-4:-2] = self.cylinder_pos
        qpos[-2:] = self.goal_pos
-        qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
-                high=0.005, size=self.model.nv)
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
        qvel[-4:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()

    def _get_obs(self):
-        return np.concatenate([
-            self.sim.data.qpos.flat[:7],
-            self.sim.data.qvel.flat[:7],
-            self.get_body_com("tips_arm"),
-            self.get_body_com("object"),
-            self.get_body_com("goal"),
-        ])
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[:7],
+                self.sim.data.qvel.flat[:7],
+                self.get_body_com("tips_arm"),
+                self.get_body_com("object"),
+                self.get_body_com("goal"),
+            ]
+        )
--- a/gym/envs/mujoco/reacher.py
+++ b/gym/envs/mujoco/reacher.py
@@ -2,15 +2,16 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
-        mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
+        mujoco_env.MujocoEnv.__init__(self, "reacher.xml", 2)

    def step(self, a):
-        vec = self.get_body_com("fingertip")-self.get_body_com("target")
-        reward_dist = - np.linalg.norm(vec)
-        reward_ctrl = - np.square(a).sum()
+        vec = self.get_body_com("fingertip") - self.get_body_com("target")
+        reward_dist = -np.linalg.norm(vec)
+        reward_ctrl = -np.square(a).sum()
        reward = reward_dist + reward_ctrl
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
@@ -21,23 +22,30 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self.viewer.cam.trackbodyid = 0

    def reset_model(self):
-        qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
+        qpos = (
+            self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
+            + self.init_qpos
+        )
        while True:
-            self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
+            self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
            if np.linalg.norm(self.goal) < 0.2:
                break
        qpos[-2:] = self.goal
-        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
        qvel[-2:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()

    def _get_obs(self):
        theta = self.sim.data.qpos.flat[:2]
-        return np.concatenate([
-            np.cos(theta),
-            np.sin(theta),
-            self.sim.data.qpos.flat[2:],
-            self.sim.data.qvel.flat[:2],
-            self.get_body_com("fingertip") - self.get_body_com("target")
-        ])
+        return np.concatenate(
+            [
+                np.cos(theta),
+                np.sin(theta),
+                self.sim.data.qpos.flat[2:],
+                self.sim.data.qvel.flat[:2],
+                self.get_body_com("fingertip") - self.get_body_com("target"),
+            ]
+        )
--- a/gym/envs/mujoco/striker.py
+++ b/gym/envs/mujoco/striker.py
@@ -2,13 +2,14 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        self._striked = False
        self._min_strike_dist = np.inf
        self.strike_threshold = 0.1
-        mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "striker.xml", 5)

    def step(self, a):
        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
@@ -21,19 +22,18 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):

        if self._striked:
            vec_3 = self.get_body_com("object") - self._strike_pos
-            reward_near = - np.linalg.norm(vec_3)
+            reward_near = -np.linalg.norm(vec_3)
        else:
-            reward_near = - np.linalg.norm(vec_1)
+            reward_near = -np.linalg.norm(vec_1)

-        reward_dist = - np.linalg.norm(self._min_strike_dist)
-        reward_ctrl = - np.square(a).sum()
+        reward_dist = -np.linalg.norm(self._min_strike_dist)
+        reward_ctrl = -np.square(a).sum()
        reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near

        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        done = False
-        return ob, reward, done, dict(reward_dist=reward_dist,
-                reward_ctrl=reward_ctrl)
+        return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)

    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0
@@ -48,9 +48,12 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):

        self.ball = np.array([0.5, -0.175])
        while True:
-            self.goal = np.concatenate([
+            self.goal = np.concatenate(
+                [
                    self.np_random.uniform(low=0.15, high=0.7, size=1),
-                    self.np_random.uniform(low=0.1, high=1.0, size=1)])
+                    self.np_random.uniform(low=0.1, high=1.0, size=1),
+                ]
+            )
            if np.linalg.norm(self.ball - self.goal) > 0.17:
                break

@@ -59,17 +62,20 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        diff = self.ball - self.goal
        angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
        qpos[-1] = angle / 3.14
-        qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
-                size=self.model.nv)
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.1, high=0.1, size=self.model.nv
+        )
        qvel[7:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()

    def _get_obs(self):
-        return np.concatenate([
-            self.sim.data.qpos.flat[:7],
-            self.sim.data.qvel.flat[:7],
-            self.get_body_com("tips_arm"),
-            self.get_body_com("object"),
-            self.get_body_com("goal"),
-        ])
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[:7],
+                self.sim.data.qvel.flat[:7],
+                self.get_body_com("tips_arm"),
+                self.get_body_com("object"),
+                self.get_body_com("goal"),
+            ]
+        )
--- a/gym/envs/mujoco/swimmer.py
+++ b/gym/envs/mujoco/swimmer.py
@@ -2,9 +2,10 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
-        mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
+        mujoco_env.MujocoEnv.__init__(self, "swimmer.xml", 4)
        utils.EzPickle.__init__(self)

    def step(self, a):
@@ -13,7 +14,7 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self.do_simulation(a, self.frame_skip)
        xposafter = self.sim.data.qpos[0]
        reward_fwd = (xposafter - xposbefore) / self.dt
-        reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
+        reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
        reward = reward_fwd + reward_ctrl
        ob = self._get_obs()
        return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
@@ -25,7 +26,9 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    def reset_model(self):
        self.set_state(
-            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
-            self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
+            self.init_qpos
+            + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv),
        )
        return self._get_obs()
--- a/gym/envs/mujoco/swimmer_v3.py
+++ b/gym/envs/mujoco/swimmer_v3.py
@@ -7,12 +7,14 @@ DEFAULT_CAMERA_CONFIG = {}


 class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
-    def __init__(self,
-                 xml_file='swimmer.xml',
-                 forward_reward_weight=1.0,
-                 ctrl_cost_weight=1e-4,
-                 reset_noise_scale=0.1,
-                 exclude_current_positions_from_observation=True):
+    def __init__(
+        self,
+        xml_file="swimmer.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-4,
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+    ):
        utils.EzPickle.__init__(**locals())

        self._forward_reward_weight = forward_reward_weight
@@ -21,7 +23,8 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self._reset_noise_scale = reset_noise_scale

        self._exclude_current_positions_from_observation = (
-            exclude_current_positions_from_observation)
+            exclude_current_positions_from_observation
+        )

        mujoco_env.MujocoEnv.__init__(self, xml_file, 4)

@@ -45,16 +48,14 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        reward = forward_reward - ctrl_cost
        done = False
        info = {
-            'reward_fwd': forward_reward,
-            'reward_ctrl': -ctrl_cost,
-
-            'x_position': xy_position_after[0],
-            'y_position': xy_position_after[1],
-            'distance_from_origin': np.linalg.norm(xy_position_after, ord=2),
-
-            'x_velocity': x_velocity,
-            'y_velocity': y_velocity,
-            'forward_reward': forward_reward,
+            "reward_fwd": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
        }

        return observation, reward, done, info
@@ -74,9 +75,11 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        noise_high = self._reset_noise_scale

        qpos = self.init_qpos + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nq)
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
        qvel = self.init_qvel + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nv)
+            low=noise_low, high=noise_high, size=self.model.nv
+        )

        self.set_state(qpos, qvel)

--- a/gym/envs/mujoco/thrower.py
+++ b/gym/envs/mujoco/thrower.py
@@ -2,12 +2,13 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

+
 class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        utils.EzPickle.__init__(self)
        self._ball_hit_ground = False
        self._ball_hit_location = None
-        mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5)
+        mujoco_env.MujocoEnv.__init__(self, "thrower.xml", 5)

    def step(self, a):
        ball_xy = self.get_body_com("ball")[:2]
@@ -22,14 +23,13 @@ class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
            reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
        else:
            reward_dist = -np.linalg.norm(ball_xy - goal_xy)
-        reward_ctrl = - np.square(a).sum()
+        reward_ctrl = -np.square(a).sum()

        reward = reward_dist + 0.002 * reward_ctrl
        self.do_simulation(a, self.frame_skip)
        ob = self._get_obs()
        done = False
-        return ob, reward, done, dict(reward_dist=reward_dist,
-                reward_ctrl=reward_ctrl)
+        return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)

    def viewer_setup(self):
        self.viewer.cam.trackbodyid = 0
@@ -40,21 +40,28 @@ class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self._ball_hit_location = None

        qpos = self.init_qpos
-        self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3),
-                              self.np_random.uniform(low=-0.3, high=0.3)])
+        self.goal = np.array(
+            [
+                self.np_random.uniform(low=-0.3, high=0.3),
+                self.np_random.uniform(low=-0.3, high=0.3),
+            ]
+        )

        qpos[-9:-7] = self.goal
-        qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
-                high=0.005, size=self.model.nv)
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
        qvel[7:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()

    def _get_obs(self):
-        return np.concatenate([
-            self.sim.data.qpos.flat[:7],
-            self.sim.data.qvel.flat[:7],
-            self.get_body_com("r_wrist_roll_link"),
-            self.get_body_com("ball"),
-            self.get_body_com("goal"),
-        ])
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[:7],
+                self.sim.data.qvel.flat[:7],
+                self.get_body_com("r_wrist_roll_link"),
+                self.get_body_com("ball"),
+                self.get_body_com("goal"),
+            ]
+        )
--- a/gym/envs/mujoco/walker2d.py
+++ b/gym/envs/mujoco/walker2d.py
@@ -2,8 +2,8 @@ import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env

-class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):

+class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
        utils.EzPickle.__init__(self)
@@ -13,11 +13,10 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self.do_simulation(a, self.frame_skip)
        posafter, height, ang = self.sim.data.qpos[0:3]
        alive_bonus = 1.0
-        reward = ((posafter - posbefore) / self.dt)
+        reward = (posafter - posbefore) / self.dt
        reward += alive_bonus
        reward -= 1e-3 * np.square(a).sum()
-        done = not (height > 0.8 and height < 2.0 and
-                    ang > -1.0 and ang < 1.0)
+        done = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
        ob = self._get_obs()
        return ob, reward, done, {}

@@ -28,8 +27,10 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    def reset_model(self):
        self.set_state(
-            self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
-            self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+            self.init_qpos
+            + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv),
        )
        return self._get_obs()

--- a/gym/envs/mujoco/walker2d_v3.py
+++ b/gym/envs/mujoco/walker2d_v3.py
@@ -4,24 +4,26 @@ from gym import utils


 DEFAULT_CAMERA_CONFIG = {
-    'trackbodyid': 2,
-    'distance': 4.0,
-    'lookat': np.array((0.0, 0.0, 1.15)),
-    'elevation': -20.0,
+    "trackbodyid": 2,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
 }


 class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
-    def __init__(self,
-                 xml_file='walker2d.xml',
-                 forward_reward_weight=1.0,
-                 ctrl_cost_weight=1e-3,
-                 healthy_reward=1.0,
-                 terminate_when_unhealthy=True,
-                 healthy_z_range=(0.8, 2.0),
-                 healthy_angle_range=(-1.0, 1.0),
-                 reset_noise_scale=5e-3,
-                 exclude_current_positions_from_observation=True):
+    def __init__(
+        self,
+        xml_file="walker2d.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.8, 2.0),
+        healthy_angle_range=(-1.0, 1.0),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+    ):
        utils.EzPickle.__init__(**locals())

        self._forward_reward_weight = forward_reward_weight
@@ -36,16 +38,17 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self._reset_noise_scale = reset_noise_scale

        self._exclude_current_positions_from_observation = (
-            exclude_current_positions_from_observation)
+            exclude_current_positions_from_observation
+        )

        mujoco_env.MujocoEnv.__init__(self, xml_file, 4)

    @property
    def healthy_reward(self):
-        return float(
-            self.is_healthy
-            or self._terminate_when_unhealthy
-        ) * self._healthy_reward
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )

    def control_cost(self, action):
        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@@ -66,15 +69,12 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    @property
    def done(self):
-        done = (not self.is_healthy
-                if self._terminate_when_unhealthy
-                else False)
+        done = not self.is_healthy if self._terminate_when_unhealthy else False
        return done

    def _get_obs(self):
        position = self.sim.data.qpos.flat.copy()
-        velocity = np.clip(
-            self.sim.data.qvel.flat.copy(), -10, 10)
+        velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)

        if self._exclude_current_positions_from_observation:
            position = position[1:]
@@ -86,8 +86,7 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        x_position_before = self.sim.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        x_position_after = self.sim.data.qpos[0]
-        x_velocity = ((x_position_after - x_position_before)
-                      / self.dt)
+        x_velocity = (x_position_after - x_position_before) / self.dt

        ctrl_cost = self.control_cost(action)

@@ -101,8 +100,8 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        reward = rewards - costs
        done = self.done
        info = {
-            'x_position': x_position_after,
-            'x_velocity': x_velocity,
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
        }

        return observation, reward, done, info
@@ -112,9 +111,11 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        noise_high = self._reset_noise_scale

        qpos = self.init_qpos + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nq)
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
        qvel = self.init_qvel + self.np_random.uniform(
-            low=noise_low, high=noise_high, size=self.model.nv)
+            low=noise_low, high=noise_high, size=self.model.nv
+        )

        self.set_state(qpos, qvel)

--- a/gym/envs/registration.py
+++ b/gym/envs/registration.py
@@ -10,7 +10,7 @@ from gym import error, logger
 #
 # 2016-10-31: We're experimentally expanding the environment ID format
 # to include an optional username.
-env_id_re = re.compile(r'^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$')
+env_id_re = re.compile(r"^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$")


 def load(name):
@@ -34,7 +34,15 @@ class EnvSpec(object):

    """

-    def __init__(self, id, entry_point=None, reward_threshold=None, nondeterministic=False, max_episode_steps=None, kwargs=None):
+    def __init__(
+        self,
+        id,
+        entry_point=None,
+        reward_threshold=None,
+        nondeterministic=False,
+        max_episode_steps=None,
+        kwargs=None,
+    ):
        self.id = id
        self.entry_point = entry_point
        self.reward_threshold = reward_threshold
@@ -44,13 +52,21 @@ class EnvSpec(object):

        match = env_id_re.search(id)
        if not match:
-            raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
-        self._env_name = match.group(1)            
+            raise error.Error(
+                "Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)".format(
+                    id, env_id_re.pattern
+                )
+            )
+        self._env_name = match.group(1)

    def make(self, **kwargs):
        """Instantiates an instance of the environment with appropriate kwargs"""
        if self.entry_point is None:
-            raise error.Error('Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)'.format(self.id))
+            raise error.Error(
+                "Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)".format(
+                    self.id
+                )
+            )
        _kwargs = self._kwargs.copy()
        _kwargs.update(kwargs)
        if callable(self.entry_point):
@@ -83,19 +99,24 @@ class EnvRegistry(object):

    def make(self, path, **kwargs):
        if len(kwargs) > 0:
-            logger.info('Making new env: %s (%s)', path, kwargs)
+            logger.info("Making new env: %s (%s)", path, kwargs)
        else:
-            logger.info('Making new env: %s', path)
+            logger.info("Making new env: %s", path)
        spec = self.spec(path)
        env = spec.make(**kwargs)
        # We used to have people override _reset/_step rather than
        # reset/step. Set _gym_disable_underscore_compat = True on
        # your environment if you use these methods and don't want
        # compatibility code to be invoked.
-        if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
+        if (
+            hasattr(env, "_reset")
+            and hasattr(env, "_step")
+            and not getattr(env, "_gym_disable_underscore_compat", False)
+        ):
            patch_deprecated_methods(env)
        if env.spec.max_episode_steps is not None:
            from gym.wrappers.time_limit import TimeLimit
+
            env = TimeLimit(env, max_episode_steps=env.spec.max_episode_steps)
        return env

@@ -103,19 +124,27 @@ class EnvRegistry(object):
        return self.env_specs.values()

    def spec(self, path):
-        if ':' in path:
-            mod_name, _sep, id = path.partition(':')
+        if ":" in path:
+            mod_name, _sep, id = path.partition(":")
            try:
                importlib.import_module(mod_name)
            # catch ImportError for python2.7 compatibility
            except ImportError:
-                raise error.Error('A module ({}) was specified for the environment but was not found, make sure the package is installed with `pip install` before calling `gym.make()`'.format(mod_name))
+                raise error.Error(
+                    "A module ({}) was specified for the environment but was not found, make sure the package is installed with `pip install` before calling `gym.make()`".format(
+                        mod_name
+                    )
+                )
        else:
            id = path

        match = env_id_re.search(id)
        if not match:
-            raise error.Error('Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id.encode('utf-8'), env_id_re.pattern))
+            raise error.Error(
+                "Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)".format(
+                    id.encode("utf-8"), env_id_re.pattern
+                )
+            )

        try:
            return self.env_specs[id]
@@ -123,32 +152,45 @@ class EnvRegistry(object):
            # Parse the env name and check to see if it matches the non-version
            # part of a valid env (could also check the exact number here)
            env_name = match.group(1)
-            matching_envs = [valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items()
-                             if env_name == valid_env_spec._env_name]
+            matching_envs = [
+                valid_env_name
+                for valid_env_name, valid_env_spec in self.env_specs.items()
+                if env_name == valid_env_spec._env_name
+            ]
            if matching_envs:
-                raise error.DeprecatedEnv('Env {} not found (valid versions include {})'.format(id, matching_envs))
+                raise error.DeprecatedEnv(
+                    "Env {} not found (valid versions include {})".format(
+                        id, matching_envs
+                    )
+                )
            else:
-                raise error.UnregisteredEnv('No registered env with id: {}'.format(id))
+                raise error.UnregisteredEnv("No registered env with id: {}".format(id))

    def register(self, id, **kwargs):
        if id in self.env_specs:
-            raise error.Error('Cannot re-register id: {}'.format(id))
+            raise error.Error("Cannot re-register id: {}".format(id))
        self.env_specs[id] = EnvSpec(id, **kwargs)

+
 # Have a global registry
 registry = EnvRegistry()

+
 def register(id, **kwargs):
    return registry.register(id, **kwargs)

+
 def make(id, **kwargs):
    return registry.make(id, **kwargs)

+
 def spec(id):
    return registry.spec(id)

+
 warn_once = True

+
 def patch_deprecated_methods(env):
    """
    Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
@@ -156,14 +198,20 @@ def patch_deprecated_methods(env):
    """
    global warn_once
    if warn_once:
-        logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env)))
+        logger.warn(
+            "Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior."
+            % str(type(env))
+        )
        warn_once = False
    env.reset = env._reset
-    env.step  = env._step
-    env.seed  = env._seed
+    env.step = env._step
+    env.seed = env._seed
+
    def render(mode):
        return env._render(mode, close=False)
+
    def close():
        env._render("human", close=True)
+
    env.render = render
    env.close = close
--- a/gym/envs/robotics/fetch/pick_and_place.py
+++ b/gym/envs/robotics/fetch/pick_and_place.py
@@ -4,20 +4,30 @@ from gym.envs.robotics import fetch_env


 # Ensure we get the path separator correct on windows
-MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml')
+MODEL_XML_PATH = os.path.join("fetch", "pick_and_place.xml")


 class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle):
-    def __init__(self, reward_type='sparse'):
+    def __init__(self, reward_type="sparse"):
        initial_qpos = {
-            'robot0:slide0': 0.405,
-            'robot0:slide1': 0.48,
-            'robot0:slide2': 0.0,
-            'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
+            "robot0:slide0": 0.405,
+            "robot0:slide1": 0.48,
+            "robot0:slide2": 0.0,
+            "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0],
        }
        fetch_env.FetchEnv.__init__(
-            self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
-            gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
-            obj_range=0.15, target_range=0.15, distance_threshold=0.05,
-            initial_qpos=initial_qpos, reward_type=reward_type)
+            self,
+            MODEL_XML_PATH,
+            has_object=True,
+            block_gripper=False,
+            n_substeps=20,
+            gripper_extra_height=0.2,
+            target_in_the_air=True,
+            target_offset=0.0,
+            obj_range=0.15,
+            target_range=0.15,
+            distance_threshold=0.05,
+            initial_qpos=initial_qpos,
+            reward_type=reward_type,
+        )
        utils.EzPickle.__init__(self, reward_type=reward_type)
--- a/gym/envs/robotics/fetch/push.py
+++ b/gym/envs/robotics/fetch/push.py
@@ -4,20 +4,30 @@ from gym.envs.robotics import fetch_env


 # Ensure we get the path separator correct on windows
-MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
+MODEL_XML_PATH = os.path.join("fetch", "push.xml")


 class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle):
-    def __init__(self, reward_type='sparse'):
+    def __init__(self, reward_type="sparse"):
        initial_qpos = {
-            'robot0:slide0': 0.405,
-            'robot0:slide1': 0.48,
-            'robot0:slide2': 0.0,
-            'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
+            "robot0:slide0": 0.405,
+            "robot0:slide1": 0.48,
+            "robot0:slide2": 0.0,
+            "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0],
        }
        fetch_env.FetchEnv.__init__(
-            self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
-            gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
-            obj_range=0.15, target_range=0.15, distance_threshold=0.05,
-            initial_qpos=initial_qpos, reward_type=reward_type)
+            self,
+            MODEL_XML_PATH,
+            has_object=True,
+            block_gripper=True,
+            n_substeps=20,
+            gripper_extra_height=0.0,
+            target_in_the_air=False,
+            target_offset=0.0,
+            obj_range=0.15,
+            target_range=0.15,
+            distance_threshold=0.05,
+            initial_qpos=initial_qpos,
+            reward_type=reward_type,
+        )
        utils.EzPickle.__init__(self, reward_type=reward_type)
--- a/gym/envs/robotics/fetch/reach.py
+++ b/gym/envs/robotics/fetch/reach.py
@@ -4,19 +4,29 @@ from gym.envs.robotics import fetch_env


 # Ensure we get the path separator correct on windows
-MODEL_XML_PATH = os.path.join('fetch', 'reach.xml')
+MODEL_XML_PATH = os.path.join("fetch", "reach.xml")


 class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
-    def __init__(self, reward_type='sparse'):
+    def __init__(self, reward_type="sparse"):
        initial_qpos = {
-            'robot0:slide0': 0.4049,
-            'robot0:slide1': 0.48,
-            'robot0:slide2': 0.0,
+            "robot0:slide0": 0.4049,
+            "robot0:slide1": 0.48,
+            "robot0:slide2": 0.0,
        }
        fetch_env.FetchEnv.__init__(
-            self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
-            gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
-            obj_range=0.15, target_range=0.15, distance_threshold=0.05,
-            initial_qpos=initial_qpos, reward_type=reward_type)
+            self,
+            MODEL_XML_PATH,
+            has_object=False,
+            block_gripper=True,
+            n_substeps=20,
+            gripper_extra_height=0.2,
+            target_in_the_air=True,
+            target_offset=0.0,
+            obj_range=0.15,
+            target_range=0.15,
+            distance_threshold=0.05,
+            initial_qpos=initial_qpos,
+            reward_type=reward_type,
+        )
        utils.EzPickle.__init__(self, reward_type=reward_type)
--- a/gym/envs/robotics/fetch/slide.py
+++ b/gym/envs/robotics/fetch/slide.py
@@ -6,20 +6,30 @@ from gym.envs.robotics import fetch_env


 # Ensure we get the path separator correct on windows
-MODEL_XML_PATH = os.path.join('fetch', 'slide.xml')
+MODEL_XML_PATH = os.path.join("fetch", "slide.xml")


 class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle):
-    def __init__(self, reward_type='sparse'):
+    def __init__(self, reward_type="sparse"):
        initial_qpos = {
-            'robot0:slide0': 0.05,
-            'robot0:slide1': 0.48,
-            'robot0:slide2': 0.0,
-            'object0:joint': [1.7, 1.1, 0.41, 1., 0., 0., 0.],
+            "robot0:slide0": 0.05,
+            "robot0:slide1": 0.48,
+            "robot0:slide2": 0.0,
+            "object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0],
        }
        fetch_env.FetchEnv.__init__(
-            self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
-            gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]),
-            obj_range=0.1, target_range=0.3, distance_threshold=0.05,
-            initial_qpos=initial_qpos, reward_type=reward_type)
+            self,
+            MODEL_XML_PATH,
+            has_object=True,
+            block_gripper=True,
+            n_substeps=20,
+            gripper_extra_height=-0.02,
+            target_in_the_air=False,
+            target_offset=np.array([0.4, 0.0, 0.0]),
+            obj_range=0.1,
+            target_range=0.3,
+            distance_threshold=0.05,
+            initial_qpos=initial_qpos,
+            reward_type=reward_type,
+        )
        utils.EzPickle.__init__(self, reward_type=reward_type)
--- a/gym/envs/robotics/fetch_env.py
+++ b/gym/envs/robotics/fetch_env.py
@@ -9,13 +9,22 @@ def goal_distance(goal_a, goal_b):


 class FetchEnv(robot_env.RobotEnv):
-    """Superclass for all Fetch environments.
-    """
+    """Superclass for all Fetch environments."""

    def __init__(
-        self, model_path, n_substeps, gripper_extra_height, block_gripper,
-        has_object, target_in_the_air, target_offset, obj_range, target_range,
-        distance_threshold, initial_qpos, reward_type,
+        self,
+        model_path,
+        n_substeps,
+        gripper_extra_height,
+        block_gripper,
+        has_object,
+        target_in_the_air,
+        target_offset,
+        obj_range,
+        target_range,
+        distance_threshold,
+        initial_qpos,
+        reward_type,
    ):
        """Initializes a new Fetch environment.

@@ -44,8 +53,11 @@ class FetchEnv(robot_env.RobotEnv):
        self.reward_type = reward_type

        super(FetchEnv, self).__init__(
-            model_path=model_path, n_substeps=n_substeps, n_actions=4,
-            initial_qpos=initial_qpos)
+            model_path=model_path,
+            n_substeps=n_substeps,
+            n_actions=4,
+            initial_qpos=initial_qpos,
+        )

    # GoalEnv methods
    # ----------------------------
@@ -53,7 +65,7 @@ class FetchEnv(robot_env.RobotEnv):
    def compute_reward(self, achieved_goal, goal, info):
        # Compute distance between goal and the achieved goal.
        d = goal_distance(achieved_goal, goal)
-        if self.reward_type == 'sparse':
+        if self.reward_type == "sparse":
            return -(d > self.distance_threshold).astype(np.float32)
        else:
            return -d
@@ -63,17 +75,24 @@ class FetchEnv(robot_env.RobotEnv):

    def _step_callback(self):
        if self.block_gripper:
-            self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.)
-            self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.)
+            self.sim.data.set_joint_qpos("robot0:l_gripper_finger_joint", 0.0)
+            self.sim.data.set_joint_qpos("robot0:r_gripper_finger_joint", 0.0)
            self.sim.forward()

    def _set_action(self, action):
        assert action.shape == (4,)
-        action = action.copy()  # ensure that we don't change the action outside of this scope
+        action = (
+            action.copy()
+        )  # ensure that we don't change the action outside of this scope
        pos_ctrl, gripper_ctrl = action[:3], action[3]

        pos_ctrl *= 0.05  # limit maximum change in position
-        rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
+        rot_ctrl = [
+            1.0,
+            0.0,
+            1.0,
+            0.0,
+        ]  # fixed rotation of the end effector, expressed as a quaternion
        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
        assert gripper_ctrl.shape == (2,)
        if self.block_gripper:
@@ -86,53 +105,66 @@ class FetchEnv(robot_env.RobotEnv):

    def _get_obs(self):
        # positions
-        grip_pos = self.sim.data.get_site_xpos('robot0:grip')
+        grip_pos = self.sim.data.get_site_xpos("robot0:grip")
        dt = self.sim.nsubsteps * self.sim.model.opt.timestep
-        grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
+        grip_velp = self.sim.data.get_site_xvelp("robot0:grip") * dt
        robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
        if self.has_object:
-            object_pos = self.sim.data.get_site_xpos('object0')
+            object_pos = self.sim.data.get_site_xpos("object0")
            # rotations
-            object_rot = rotations.mat2euler(self.sim.data.get_site_xmat('object0'))
+            object_rot = rotations.mat2euler(self.sim.data.get_site_xmat("object0"))
            # velocities
-            object_velp = self.sim.data.get_site_xvelp('object0') * dt
-            object_velr = self.sim.data.get_site_xvelr('object0') * dt
+            object_velp = self.sim.data.get_site_xvelp("object0") * dt
+            object_velr = self.sim.data.get_site_xvelr("object0") * dt
            # gripper state
            object_rel_pos = object_pos - grip_pos
            object_velp -= grip_velp
        else:
-            object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0)
+            object_pos = (
+                object_rot
+            ) = object_velp = object_velr = object_rel_pos = np.zeros(0)
        gripper_state = robot_qpos[-2:]
-        gripper_vel = robot_qvel[-2:] * dt  # change to a scalar if the gripper is made symmetric
+        gripper_vel = (
+            robot_qvel[-2:] * dt
+        )  # change to a scalar if the gripper is made symmetric

        if not self.has_object:
            achieved_goal = grip_pos.copy()
        else:
            achieved_goal = np.squeeze(object_pos.copy())
-        obs = np.concatenate([
-            grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(),
-            object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel,
-        ])
+        obs = np.concatenate(
+            [
+                grip_pos,
+                object_pos.ravel(),
+                object_rel_pos.ravel(),
+                gripper_state,
+                object_rot.ravel(),
+                object_velp.ravel(),
+                object_velr.ravel(),
+                grip_velp,
+                gripper_vel,
+            ]
+        )

        return {
-            'observation': obs.copy(),
-            'achieved_goal': achieved_goal.copy(),
-            'desired_goal': self.goal.copy(),
+            "observation": obs.copy(),
+            "achieved_goal": achieved_goal.copy(),
+            "desired_goal": self.goal.copy(),
        }

    def _viewer_setup(self):
-        body_id = self.sim.model.body_name2id('robot0:gripper_link')
+        body_id = self.sim.model.body_name2id("robot0:gripper_link")
        lookat = self.sim.data.body_xpos[body_id]
        for idx, value in enumerate(lookat):
            self.viewer.cam.lookat[idx] = value
        self.viewer.cam.distance = 2.5
-        self.viewer.cam.azimuth = 132.
-        self.viewer.cam.elevation = -14.
+        self.viewer.cam.azimuth = 132.0
+        self.viewer.cam.elevation = -14.0

    def _render_callback(self):
        # Visualize target.
        sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
-        site_id = self.sim.model.site_name2id('target0')
+        site_id = self.sim.model.site_name2id("target0")
        self.sim.model.site_pos[site_id] = self.goal - sites_offset[0]
        self.sim.forward()

@@ -143,24 +175,30 @@ class FetchEnv(robot_env.RobotEnv):
        if self.has_object:
            object_xpos = self.initial_gripper_xpos[:2]
            while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
-                object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2)
-            object_qpos = self.sim.data.get_joint_qpos('object0:joint')
+                object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(
+                    -self.obj_range, self.obj_range, size=2
+                )
+            object_qpos = self.sim.data.get_joint_qpos("object0:joint")
            assert object_qpos.shape == (7,)
            object_qpos[:2] = object_xpos
-            self.sim.data.set_joint_qpos('object0:joint', object_qpos)
+            self.sim.data.set_joint_qpos("object0:joint", object_qpos)

        self.sim.forward()
        return True

    def _sample_goal(self):
        if self.has_object:
-            goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
+            goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(
+                -self.target_range, self.target_range, size=3
+            )
            goal += self.target_offset
            goal[2] = self.height_offset
            if self.target_in_the_air and self.np_random.uniform() < 0.5:
                goal[2] += self.np_random.uniform(0, 0.45)
        else:
-            goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
+            goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(
+                -self.target_range, self.target_range, size=3
+            )
        return goal.copy()

    def _is_success(self, achieved_goal, desired_goal):
@@ -174,17 +212,19 @@ class FetchEnv(robot_env.RobotEnv):
        self.sim.forward()

        # Move end effector into position.
-        gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos('robot0:grip')
-        gripper_rotation = np.array([1., 0., 1., 0.])
-        self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
-        self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
+        gripper_target = np.array(
+            [-0.498, 0.005, -0.431 + self.gripper_extra_height]
+        ) + self.sim.data.get_site_xpos("robot0:grip")
+        gripper_rotation = np.array([1.0, 0.0, 1.0, 0.0])
+        self.sim.data.set_mocap_pos("robot0:mocap", gripper_target)
+        self.sim.data.set_mocap_quat("robot0:mocap", gripper_rotation)
        for _ in range(10):
            self.sim.step()

        # Extract information for sampling goals.
-        self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy()
+        self.initial_gripper_xpos = self.sim.data.get_site_xpos("robot0:grip").copy()
        if self.has_object:
-            self.height_offset = self.sim.data.get_site_xpos('object0')[2]
+            self.height_offset = self.sim.data.get_site_xpos("object0")[2]

-    def render(self, mode='human', width=500, height=500):
+    def render(self, mode="human", width=500, height=500):
        return super(FetchEnv, self).render(mode, width, height)
--- a/gym/envs/robotics/hand/manipulate.py
+++ b/gym/envs/robotics/hand/manipulate.py
@@ -8,29 +8,42 @@ from gym.envs.robotics.utils import robot_get_obs
 try:
    import mujoco_py
 except ImportError as e:
-    raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+    raise error.DependencyNotInstalled(
+        "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
+            e
+        )
+    )


 def quat_from_angle_and_axis(angle, axis):
    assert axis.shape == (3,)
    axis /= np.linalg.norm(axis)
-    quat = np.concatenate([[np.cos(angle / 2.)], np.sin(angle / 2.) * axis])
+    quat = np.concatenate([[np.cos(angle / 2.0)], np.sin(angle / 2.0) * axis])
    quat /= np.linalg.norm(quat)
    return quat


 # Ensure we get the path separator correct on windows
-MANIPULATE_BLOCK_XML = os.path.join('hand', 'manipulate_block.xml')
-MANIPULATE_EGG_XML = os.path.join('hand', 'manipulate_egg.xml')
-MANIPULATE_PEN_XML = os.path.join('hand', 'manipulate_pen.xml')
+MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block.xml")
+MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg.xml")
+MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen.xml")


 class ManipulateEnv(hand_env.HandEnv):
    def __init__(
-        self, model_path, target_position, target_rotation,
-        target_position_range, reward_type, initial_qpos=None,
-        randomize_initial_position=True, randomize_initial_rotation=True,
-        distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
+        self,
+        model_path,
+        target_position,
+        target_rotation,
+        target_position_range,
+        reward_type,
+        initial_qpos=None,
+        randomize_initial_position=True,
+        randomize_initial_rotation=True,
+        distance_threshold=0.01,
+        rotation_threshold=0.1,
+        n_substeps=20,
+        relative_control=False,
        ignore_z_target_rotation=False,
    ):
        """Initializes a new Hand manipulation environment.
@@ -61,7 +74,9 @@ class ManipulateEnv(hand_env.HandEnv):
        self.target_position = target_position
        self.target_rotation = target_rotation
        self.target_position_range = target_position_range
-        self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()]
+        self.parallel_quats = [
+            rotations.euler2quat(r) for r in rotations.get_parallel_rotations()
+        ]
        self.randomize_initial_rotation = randomize_initial_rotation
        self.randomize_initial_position = randomize_initial_position
        self.distance_threshold = distance_threshold
@@ -69,18 +84,21 @@ class ManipulateEnv(hand_env.HandEnv):
        self.reward_type = reward_type
        self.ignore_z_target_rotation = ignore_z_target_rotation

-        assert self.target_position in ['ignore', 'fixed', 'random']
-        assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel']
+        assert self.target_position in ["ignore", "fixed", "random"]
+        assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"]
        initial_qpos = initial_qpos or {}

        hand_env.HandEnv.__init__(
-            self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos,
-            relative_control=relative_control)
-
+            self,
+            model_path,
+            n_substeps=n_substeps,
+            initial_qpos=initial_qpos,
+            relative_control=relative_control,
+        )

    def _get_achieved_goal(self):
        # Object position and rotation.
-        object_qpos = self.sim.data.get_joint_qpos('object:joint')
+        object_qpos = self.sim.data.get_joint_qpos("object:joint")
        assert object_qpos.shape == (7,)
        return object_qpos

@@ -90,11 +108,11 @@ class ManipulateEnv(hand_env.HandEnv):

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
-        if self.target_position != 'ignore':
+        if self.target_position != "ignore":
            delta_pos = goal_a[..., :3] - goal_b[..., :3]
            d_pos = np.linalg.norm(delta_pos, axis=-1)

-        if self.target_rotation != 'ignore':
+        if self.target_rotation != "ignore":
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

            if self.ignore_z_target_rotation:
@@ -109,7 +127,7 @@ class ManipulateEnv(hand_env.HandEnv):

            # Subtract quaternions and extract angle between them.
            quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
-            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
+            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
@@ -118,14 +136,14 @@ class ManipulateEnv(hand_env.HandEnv):
    # ----------------------------

    def compute_reward(self, achieved_goal, goal, info):
-        if self.reward_type == 'sparse':
+        if self.reward_type == "sparse":
            success = self._is_success(achieved_goal, goal).astype(np.float32)
-            return (success - 1.)
+            return success - 1.0
        else:
            d_pos, d_rot = self._goal_distance(achieved_goal, goal)
            # We weigh the difference in position to avoid that `d_pos` (in meters) is completely
            # dominated by `d_rot` (in radians).
-            return -(10. * d_pos + d_rot)
+            return -(10.0 * d_pos + d_rot)

    # RobotEnv methods
    # ----------------------------
@@ -146,7 +164,7 @@ class ManipulateEnv(hand_env.HandEnv):
        self.sim.set_state(self.initial_state)
        self.sim.forward()

-        initial_qpos = self.sim.data.get_joint_qpos('object:joint').copy()
+        initial_qpos = self.sim.data.get_joint_qpos("object:joint").copy()
        initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
        assert initial_qpos.shape == (7,)
        assert initial_pos.shape == (3,)
@@ -155,42 +173,46 @@ class ManipulateEnv(hand_env.HandEnv):

        # Randomization initial rotation.
        if self.randomize_initial_rotation:
-            if self.target_rotation == 'z':
+            if self.target_rotation == "z":
                angle = self.np_random.uniform(-np.pi, np.pi)
-                axis = np.array([0., 0., 1.])
+                axis = np.array([0.0, 0.0, 1.0])
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
-            elif self.target_rotation == 'parallel':
+            elif self.target_rotation == "parallel":
                angle = self.np_random.uniform(-np.pi, np.pi)
-                axis = np.array([0., 0., 1.])
+                axis = np.array([0.0, 0.0, 1.0])
                z_quat = quat_from_angle_and_axis(angle, axis)
-                parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
+                parallel_quat = self.parallel_quats[
+                    self.np_random.randint(len(self.parallel_quats))
+                ]
                offset_quat = rotations.quat_mul(z_quat, parallel_quat)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
-            elif self.target_rotation in ['xyz', 'ignore']:
+            elif self.target_rotation in ["xyz", "ignore"]:
                angle = self.np_random.uniform(-np.pi, np.pi)
-                axis = self.np_random.uniform(-1., 1., size=3)
+                axis = self.np_random.uniform(-1.0, 1.0, size=3)
                offset_quat = quat_from_angle_and_axis(angle, axis)
                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
-            elif self.target_rotation == 'fixed':
+            elif self.target_rotation == "fixed":
                pass
            else:
-                raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
+                raise error.Error(
+                    'Unknown target_rotation option "{}".'.format(self.target_rotation)
+                )

        # Randomize initial position.
        if self.randomize_initial_position:
-            if self.target_position != 'fixed':
+            if self.target_position != "fixed":
                initial_pos += self.np_random.normal(size=3, scale=0.005)

        initial_quat /= np.linalg.norm(initial_quat)
        initial_qpos = np.concatenate([initial_pos, initial_quat])
-        self.sim.data.set_joint_qpos('object:joint', initial_qpos)
+        self.sim.data.set_joint_qpos("object:joint", initial_qpos)

        def is_on_palm():
            self.sim.forward()
-            cube_middle_idx = self.sim.model.site_name2id('object:center')
+            cube_middle_idx = self.sim.model.site_name2id("object:center")
            cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
-            is_on_palm = (cube_middle_pos[2] > 0.04)
+            is_on_palm = cube_middle_pos[2] > 0.04
            return is_on_palm

        # Run the simulation for a bunch of timesteps to let everything settle in.
@@ -205,38 +227,46 @@ class ManipulateEnv(hand_env.HandEnv):
    def _sample_goal(self):
        # Select a goal for the object position.
        target_pos = None
-        if self.target_position == 'random':
+        if self.target_position == "random":
            assert self.target_position_range.shape == (3, 2)
-            offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1])
+            offset = self.np_random.uniform(
+                self.target_position_range[:, 0], self.target_position_range[:, 1]
+            )
            assert offset.shape == (3,)
-            target_pos = self.sim.data.get_joint_qpos('object:joint')[:3] + offset
-        elif self.target_position in ['ignore', 'fixed']:
-            target_pos = self.sim.data.get_joint_qpos('object:joint')[:3]
+            target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + offset
+        elif self.target_position in ["ignore", "fixed"]:
+            target_pos = self.sim.data.get_joint_qpos("object:joint")[:3]
        else:
-            raise error.Error('Unknown target_position option "{}".'.format(self.target_position))
+            raise error.Error(
+                'Unknown target_position option "{}".'.format(self.target_position)
+            )
        assert target_pos is not None
        assert target_pos.shape == (3,)

        # Select a goal for the object rotation.
        target_quat = None
-        if self.target_rotation == 'z':
+        if self.target_rotation == "z":
            angle = self.np_random.uniform(-np.pi, np.pi)
-            axis = np.array([0., 0., 1.])
+            axis = np.array([0.0, 0.0, 1.0])
            target_quat = quat_from_angle_and_axis(angle, axis)
-        elif self.target_rotation == 'parallel':
+        elif self.target_rotation == "parallel":
            angle = self.np_random.uniform(-np.pi, np.pi)
-            axis = np.array([0., 0., 1.])
+            axis = np.array([0.0, 0.0, 1.0])
            target_quat = quat_from_angle_and_axis(angle, axis)
-            parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
+            parallel_quat = self.parallel_quats[
+                self.np_random.randint(len(self.parallel_quats))
+            ]
            target_quat = rotations.quat_mul(target_quat, parallel_quat)
-        elif self.target_rotation == 'xyz':
+        elif self.target_rotation == "xyz":
            angle = self.np_random.uniform(-np.pi, np.pi)
-            axis = self.np_random.uniform(-1., 1., size=3)
+            axis = self.np_random.uniform(-1.0, 1.0, size=3)
            target_quat = quat_from_angle_and_axis(angle, axis)
-        elif self.target_rotation in ['ignore', 'fixed']:
-            target_quat = self.sim.data.get_joint_qpos('object:joint')
+        elif self.target_rotation in ["ignore", "fixed"]:
+            target_quat = self.sim.data.get_joint_qpos("object:joint")
        else:
-            raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
+            raise error.Error(
+                'Unknown target_rotation option "{}".'.format(self.target_rotation)
+            )
        assert target_quat is not None
        assert target_quat.shape == (4,)

@@ -249,55 +279,76 @@ class ManipulateEnv(hand_env.HandEnv):
        # is not obscured.
        goal = self.goal.copy()
        assert goal.shape == (7,)
-        if self.target_position == 'ignore':
+        if self.target_position == "ignore":
            # Move the object to the side since we do not care about it's position.
            goal[0] += 0.15
-        self.sim.data.set_joint_qpos('target:joint', goal)
-        self.sim.data.set_joint_qvel('target:joint', np.zeros(6))
+        self.sim.data.set_joint_qpos("target:joint", goal)
+        self.sim.data.set_joint_qvel("target:joint", np.zeros(6))

-        if 'object_hidden' in self.sim.model.geom_names:
-            hidden_id = self.sim.model.geom_name2id('object_hidden')
-            self.sim.model.geom_rgba[hidden_id, 3] = 1.
+        if "object_hidden" in self.sim.model.geom_names:
+            hidden_id = self.sim.model.geom_name2id("object_hidden")
+            self.sim.model.geom_rgba[hidden_id, 3] = 1.0
        self.sim.forward()

    def _get_obs(self):
        robot_qpos, robot_qvel = robot_get_obs(self.sim)
-        object_qvel = self.sim.data.get_joint_qvel('object:joint')
-        achieved_goal = self._get_achieved_goal().ravel()  # this contains the object position + rotation
-        observation = np.concatenate([robot_qpos, robot_qvel, object_qvel, achieved_goal])
+        object_qvel = self.sim.data.get_joint_qvel("object:joint")
+        achieved_goal = (
+            self._get_achieved_goal().ravel()
+        )  # this contains the object position + rotation
+        observation = np.concatenate(
+            [robot_qpos, robot_qvel, object_qvel, achieved_goal]
+        )
        return {
-            'observation': observation.copy(),
-            'achieved_goal': achieved_goal.copy(),
-            'desired_goal': self.goal.ravel().copy(),
+            "observation": observation.copy(),
+            "achieved_goal": achieved_goal.copy(),
+            "desired_goal": self.goal.ravel().copy(),
        }


 class HandBlockEnv(ManipulateEnv, utils.EzPickle):
-    def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+    def __init__(
+        self, target_position="random", target_rotation="xyz", reward_type="sparse"
+    ):
        utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
-        ManipulateEnv.__init__(self,
-            model_path=MANIPULATE_BLOCK_XML, target_position=target_position,
+        ManipulateEnv.__init__(
+            self,
+            model_path=MANIPULATE_BLOCK_XML,
+            target_position=target_position,
            target_rotation=target_rotation,
            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
-            reward_type=reward_type)
+            reward_type=reward_type,
+        )


 class HandEggEnv(ManipulateEnv, utils.EzPickle):
-    def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+    def __init__(
+        self, target_position="random", target_rotation="xyz", reward_type="sparse"
+    ):
        utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
-        ManipulateEnv.__init__(self,
-            model_path=MANIPULATE_EGG_XML, target_position=target_position,
+        ManipulateEnv.__init__(
+            self,
+            model_path=MANIPULATE_EGG_XML,
+            target_position=target_position,
            target_rotation=target_rotation,
            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
-            reward_type=reward_type)
+            reward_type=reward_type,
+        )


 class HandPenEnv(ManipulateEnv, utils.EzPickle):
-    def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
+    def __init__(
+        self, target_position="random", target_rotation="xyz", reward_type="sparse"
+    ):
        utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
-        ManipulateEnv.__init__(self,
-            model_path=MANIPULATE_PEN_XML, target_position=target_position,
+        ManipulateEnv.__init__(
+            self,
+            model_path=MANIPULATE_PEN_XML,
+            target_position=target_position,
            target_rotation=target_rotation,
            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
-            randomize_initial_rotation=False, reward_type=reward_type,
-            ignore_z_target_rotation=True, distance_threshold=0.05)
+            randomize_initial_rotation=False,
+            reward_type=reward_type,
+            ignore_z_target_rotation=True,
+            distance_threshold=0.05,
+        )
--- a/gym/envs/robotics/hand/manipulate_touch_sensors.py
+++ b/gym/envs/robotics/hand/manipulate_touch_sensors.py
@@ -5,18 +5,29 @@ from gym import utils, error, spaces
 from gym.envs.robotics.hand import manipulate

 # Ensure we get the path separator correct on windows
-MANIPULATE_BLOCK_XML = os.path.join('hand', 'manipulate_block_touch_sensors.xml')
-MANIPULATE_EGG_XML = os.path.join('hand', 'manipulate_egg_touch_sensors.xml')
-MANIPULATE_PEN_XML = os.path.join('hand', 'manipulate_pen_touch_sensors.xml')
+MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block_touch_sensors.xml")
+MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg_touch_sensors.xml")
+MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen_touch_sensors.xml")


 class ManipulateTouchSensorsEnv(manipulate.ManipulateEnv):
    def __init__(
-        self, model_path, target_position, target_rotation,
-        target_position_range, reward_type, initial_qpos={},
-        randomize_initial_position=True, randomize_initial_rotation=True,
-        distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
-        ignore_z_target_rotation=False, touch_visualisation="on_touch", touch_get_obs="sensordata",
+        self,
+        model_path,
+        target_position,
+        target_rotation,
+        target_position_range,
+        reward_type,
+        initial_qpos={},
+        randomize_initial_position=True,
+        randomize_initial_rotation=True,
+        distance_threshold=0.01,
+        rotation_threshold=0.1,
+        n_substeps=20,
+        relative_control=False,
+        ignore_z_target_rotation=False,
+        touch_visualisation="on_touch",
+        touch_get_obs="sensordata",
    ):
        """Initializes a new Hand manipulation environment with touch sensors.

@@ -40,34 +51,63 @@ class ManipulateTouchSensorsEnv(manipulate.ManipulateEnv):
        self.notouch_color = [0, 0.5, 0, 0.2]

        manipulate.ManipulateEnv.__init__(
-            self, model_path, target_position, target_rotation,
-            target_position_range, reward_type, initial_qpos=initial_qpos,
-            randomize_initial_position=randomize_initial_position, randomize_initial_rotation=randomize_initial_rotation,
-            distance_threshold=distance_threshold, rotation_threshold=rotation_threshold, n_substeps=n_substeps, relative_control=relative_control,
+            self,
+            model_path,
+            target_position,
+            target_rotation,
+            target_position_range,
+            reward_type,
+            initial_qpos=initial_qpos,
+            randomize_initial_position=randomize_initial_position,
+            randomize_initial_rotation=randomize_initial_rotation,
+            distance_threshold=distance_threshold,
+            rotation_threshold=rotation_threshold,
+            n_substeps=n_substeps,
+            relative_control=relative_control,
            ignore_z_target_rotation=ignore_z_target_rotation,
        )

-        for k, v in self.sim.model._sensor_name2id.items():  # get touch sensor site names and their ids
-            if 'robot0:TS_' in k:
-                self._touch_sensor_id_site_id.append((v, self.sim.model._site_name2id[k.replace('robot0:TS_', 'robot0:T_')]))
+        for (
+            k,
+            v,
+        ) in (
+            self.sim.model._sensor_name2id.items()
+        ):  # get touch sensor site names and their ids
+            if "robot0:TS_" in k:
+                self._touch_sensor_id_site_id.append(
+                    (
+                        v,
+                        self.sim.model._site_name2id[
+                            k.replace("robot0:TS_", "robot0:T_")
+                        ],
+                    )
+                )
                self._touch_sensor_id.append(v)

-        if self.touch_visualisation == 'off':  # set touch sensors rgba values
+        if self.touch_visualisation == "off":  # set touch sensors rgba values
            for _, site_id in self._touch_sensor_id_site_id:
                self.sim.model.site_rgba[site_id][3] = 0.0
-        elif self.touch_visualisation == 'always':
+        elif self.touch_visualisation == "always":
            pass

        obs = self._get_obs()
-        self.observation_space = spaces.Dict(dict(
-            desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
-            achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
-            observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'),
-        ))
+        self.observation_space = spaces.Dict(
+            dict(
+                desired_goal=spaces.Box(
+                    -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
+                ),
+                achieved_goal=spaces.Box(
+                    -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
+                ),
+                observation=spaces.Box(
+                    -np.inf, np.inf, shape=obs["observation"].shape, dtype="float32"
+                ),
+            )
+        )

    def _render_callback(self):
        super(ManipulateTouchSensorsEnv, self)._render_callback()
-        if self.touch_visualisation == 'on_touch':
+        if self.touch_visualisation == "on_touch":
            for touch_sensor_id, site_id in self._touch_sensor_id_site_id:
                if self.sim.data.sensordata[touch_sensor_id] != 0.0:
                    self.sim.model.site_rgba[site_id] = self.touch_color
@@ -76,56 +116,92 @@ class ManipulateTouchSensorsEnv(manipulate.ManipulateEnv):

    def _get_obs(self):
        robot_qpos, robot_qvel = manipulate.robot_get_obs(self.sim)
-        object_qvel = self.sim.data.get_joint_qvel('object:joint')
-        achieved_goal = self._get_achieved_goal().ravel()  # this contains the object position + rotation
+        object_qvel = self.sim.data.get_joint_qvel("object:joint")
+        achieved_goal = (
+            self._get_achieved_goal().ravel()
+        )  # this contains the object position + rotation
        touch_values = []  # get touch sensor readings. if there is one, set value to 1
-        if self.touch_get_obs == 'sensordata':
+        if self.touch_get_obs == "sensordata":
            touch_values = self.sim.data.sensordata[self._touch_sensor_id]
-        elif self.touch_get_obs == 'boolean':
+        elif self.touch_get_obs == "boolean":
            touch_values = self.sim.data.sensordata[self._touch_sensor_id] > 0.0
-        elif self.touch_get_obs == 'log':
+        elif self.touch_get_obs == "log":
            touch_values = np.log(self.sim.data.sensordata[self._touch_sensor_id] + 1.0)
-        observation = np.concatenate([robot_qpos, robot_qvel, object_qvel, touch_values, achieved_goal])
+        observation = np.concatenate(
+            [robot_qpos, robot_qvel, object_qvel, touch_values, achieved_goal]
+        )

        return {
-            'observation': observation.copy(),
-            'achieved_goal': achieved_goal.copy(),
-            'desired_goal': self.goal.ravel().copy(),
+            "observation": observation.copy(),
+            "achieved_goal": achieved_goal.copy(),
+            "desired_goal": self.goal.ravel().copy(),
        }


 class HandBlockTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle):
-    def __init__(self, target_position='random', target_rotation='xyz', touch_get_obs='sensordata', reward_type='sparse'):
-        utils.EzPickle.__init__(self, target_position, target_rotation, touch_get_obs, reward_type)
-        ManipulateTouchSensorsEnv.__init__(self,
+    def __init__(
+        self,
+        target_position="random",
+        target_rotation="xyz",
+        touch_get_obs="sensordata",
+        reward_type="sparse",
+    ):
+        utils.EzPickle.__init__(
+            self, target_position, target_rotation, touch_get_obs, reward_type
+        )
+        ManipulateTouchSensorsEnv.__init__(
+            self,
            model_path=MANIPULATE_BLOCK_XML,
            touch_get_obs=touch_get_obs,
            target_rotation=target_rotation,
            target_position=target_position,
            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
-            reward_type=reward_type)
+            reward_type=reward_type,
+        )


 class HandEggTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle):
-    def __init__(self, target_position='random', target_rotation='xyz', touch_get_obs='sensordata', reward_type='sparse'):
-        utils.EzPickle.__init__(self, target_position, target_rotation, touch_get_obs, reward_type)
-        ManipulateTouchSensorsEnv.__init__(self,
+    def __init__(
+        self,
+        target_position="random",
+        target_rotation="xyz",
+        touch_get_obs="sensordata",
+        reward_type="sparse",
+    ):
+        utils.EzPickle.__init__(
+            self, target_position, target_rotation, touch_get_obs, reward_type
+        )
+        ManipulateTouchSensorsEnv.__init__(
+            self,
            model_path=MANIPULATE_EGG_XML,
            touch_get_obs=touch_get_obs,
            target_rotation=target_rotation,
            target_position=target_position,
            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
-            reward_type=reward_type)
+            reward_type=reward_type,
+        )


 class HandPenTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle):
-    def __init__(self, target_position='random', target_rotation='xyz', touch_get_obs='sensordata', reward_type='sparse'):
-        utils.EzPickle.__init__(self, target_position, target_rotation, touch_get_obs, reward_type)
-        ManipulateTouchSensorsEnv.__init__(self,
+    def __init__(
+        self,
+        target_position="random",
+        target_rotation="xyz",
+        touch_get_obs="sensordata",
+        reward_type="sparse",
+    ):
+        utils.EzPickle.__init__(
+            self, target_position, target_rotation, touch_get_obs, reward_type
+        )
+        ManipulateTouchSensorsEnv.__init__(
+            self,
            model_path=MANIPULATE_PEN_XML,
            touch_get_obs=touch_get_obs,
            target_rotation=target_rotation,
            target_position=target_position,
            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
-            randomize_initial_rotation=False, reward_type=reward_type,
-            ignore_z_target_rotation=True, distance_threshold=0.05)
+            randomize_initial_rotation=False,
+            reward_type=reward_type,
+            ignore_z_target_rotation=True,
+            distance_threshold=0.05,
+        )
--- a/gym/envs/robotics/hand/reach.py
+++ b/gym/envs/robotics/hand/reach.py
@@ -7,44 +7,44 @@ from gym.envs.robotics.utils import robot_get_obs


 FINGERTIP_SITE_NAMES = [
-    'robot0:S_fftip',
-    'robot0:S_mftip',
-    'robot0:S_rftip',
-    'robot0:S_lftip',
-    'robot0:S_thtip',
+    "robot0:S_fftip",
+    "robot0:S_mftip",
+    "robot0:S_rftip",
+    "robot0:S_lftip",
+    "robot0:S_thtip",
 ]


 DEFAULT_INITIAL_QPOS = {
-    'robot0:WRJ1': -0.16514339750464327,
-    'robot0:WRJ0': -0.31973286565062153,
-    'robot0:FFJ3': 0.14340512546557435,
-    'robot0:FFJ2': 0.32028208333591573,
-    'robot0:FFJ1': 0.7126053607727917,
-    'robot0:FFJ0': 0.6705281001412586,
-    'robot0:MFJ3': 0.000246444303701037,
-    'robot0:MFJ2': 0.3152655251085491,
-    'robot0:MFJ1': 0.7659800313729842,
-    'robot0:MFJ0': 0.7323156897425923,
-    'robot0:RFJ3': 0.00038520700007378114,
-    'robot0:RFJ2': 0.36743546201985233,
-    'robot0:RFJ1': 0.7119514095008576,
-    'robot0:RFJ0': 0.6699446327514138,
-    'robot0:LFJ4': 0.0525442258033891,
-    'robot0:LFJ3': -0.13615534724474673,
-    'robot0:LFJ2': 0.39872030433433003,
-    'robot0:LFJ1': 0.7415570009679252,
-    'robot0:LFJ0': 0.704096378652974,
-    'robot0:THJ4': 0.003673823825070126,
-    'robot0:THJ3': 0.5506291436028695,
-    'robot0:THJ2': -0.014515151997119306,
-    'robot0:THJ1': -0.0015229223564485414,
-    'robot0:THJ0': -0.7894883021600622,
+    "robot0:WRJ1": -0.16514339750464327,
+    "robot0:WRJ0": -0.31973286565062153,
+    "robot0:FFJ3": 0.14340512546557435,
+    "robot0:FFJ2": 0.32028208333591573,
+    "robot0:FFJ1": 0.7126053607727917,
+    "robot0:FFJ0": 0.6705281001412586,
+    "robot0:MFJ3": 0.000246444303701037,
+    "robot0:MFJ2": 0.3152655251085491,
+    "robot0:MFJ1": 0.7659800313729842,
+    "robot0:MFJ0": 0.7323156897425923,
+    "robot0:RFJ3": 0.00038520700007378114,
+    "robot0:RFJ2": 0.36743546201985233,
+    "robot0:RFJ1": 0.7119514095008576,
+    "robot0:RFJ0": 0.6699446327514138,
+    "robot0:LFJ4": 0.0525442258033891,
+    "robot0:LFJ3": -0.13615534724474673,
+    "robot0:LFJ2": 0.39872030433433003,
+    "robot0:LFJ1": 0.7415570009679252,
+    "robot0:LFJ0": 0.704096378652974,
+    "robot0:THJ4": 0.003673823825070126,
+    "robot0:THJ3": 0.5506291436028695,
+    "robot0:THJ2": -0.014515151997119306,
+    "robot0:THJ1": -0.0015229223564485414,
+    "robot0:THJ0": -0.7894883021600622,
 }


 # Ensure we get the path separator correct on windows
-MODEL_XML_PATH = os.path.join('hand', 'reach.xml')
+MODEL_XML_PATH = os.path.join("hand", "reach.xml")


 def goal_distance(goal_a, goal_b):
@@ -54,16 +54,24 @@ def goal_distance(goal_a, goal_b):

 class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
    def __init__(
-        self, distance_threshold=0.01, n_substeps=20, relative_control=False,
-        initial_qpos=DEFAULT_INITIAL_QPOS, reward_type='sparse',
+        self,
+        distance_threshold=0.01,
+        n_substeps=20,
+        relative_control=False,
+        initial_qpos=DEFAULT_INITIAL_QPOS,
+        reward_type="sparse",
    ):
        utils.EzPickle.__init__(**locals())
        self.distance_threshold = distance_threshold
        self.reward_type = reward_type

        hand_env.HandEnv.__init__(
-            self, MODEL_XML_PATH, n_substeps=n_substeps, initial_qpos=initial_qpos,
-            relative_control=relative_control)
+            self,
+            MODEL_XML_PATH,
+            n_substeps=n_substeps,
+            initial_qpos=initial_qpos,
+            relative_control=relative_control,
+        )

    def _get_achieved_goal(self):
        goal = [self.sim.data.get_site_xpos(name) for name in FINGERTIP_SITE_NAMES]
@@ -74,7 +82,7 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):

    def compute_reward(self, achieved_goal, goal, info):
        d = goal_distance(achieved_goal, goal)
-        if self.reward_type == 'sparse':
+        if self.reward_type == "sparse":
            return -(d > self.distance_threshold).astype(np.float32)
        else:
            return -d
@@ -88,20 +96,22 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
        self.sim.forward()

        self.initial_goal = self._get_achieved_goal().copy()
-        self.palm_xpos = self.sim.data.body_xpos[self.sim.model.body_name2id('robot0:palm')].copy()
+        self.palm_xpos = self.sim.data.body_xpos[
+            self.sim.model.body_name2id("robot0:palm")
+        ].copy()

    def _get_obs(self):
        robot_qpos, robot_qvel = robot_get_obs(self.sim)
        achieved_goal = self._get_achieved_goal().ravel()
        observation = np.concatenate([robot_qpos, robot_qvel, achieved_goal])
        return {
-            'observation': observation.copy(),
-            'achieved_goal': achieved_goal.copy(),
-            'desired_goal': self.goal.copy(),
+            "observation": observation.copy(),
+            "achieved_goal": achieved_goal.copy(),
+            "desired_goal": self.goal.copy(),
        }

    def _sample_goal(self):
-        thumb_name = 'robot0:S_thtip'
+        thumb_name = "robot0:S_thtip"
        finger_names = [name for name in FINGERTIP_SITE_NAMES if name != thumb_name]
        finger_name = self.np_random.choice(finger_names)

@@ -117,7 +127,7 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
        # overlap.
        goal = self.initial_goal.copy().reshape(-1, 3)
        for idx in [thumb_idx, finger_idx]:
-            offset_direction = (meeting_pos - goal[idx])
+            offset_direction = meeting_pos - goal[idx]
            offset_direction /= np.linalg.norm(offset_direction)
            goal[idx] = meeting_pos - 0.005 * offset_direction

@@ -136,14 +146,16 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
        sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
        goal = self.goal.reshape(5, 3)
        for finger_idx in range(5):
-            site_name = 'target{}'.format(finger_idx)
+            site_name = "target{}".format(finger_idx)
            site_id = self.sim.model.site_name2id(site_name)
            self.sim.model.site_pos[site_id] = goal[finger_idx] - sites_offset[site_id]

        # Visualize finger positions.
        achieved_goal = self._get_achieved_goal().reshape(5, 3)
        for finger_idx in range(5):
-            site_name = 'finger{}'.format(finger_idx)
+            site_name = "finger{}".format(finger_idx)
            site_id = self.sim.model.site_name2id(site_name)
-            self.sim.model.site_pos[site_id] = achieved_goal[finger_idx] - sites_offset[site_id]
+            self.sim.model.site_pos[site_id] = (
+                achieved_goal[finger_idx] - sites_offset[site_id]
+            )
        self.sim.forward()
--- a/gym/envs/robotics/hand_env.py
+++ b/gym/envs/robotics/hand_env.py
@@ -13,8 +13,11 @@ class HandEnv(robot_env.RobotEnv):
        self.relative_control = relative_control

        super(HandEnv, self).__init__(
-            model_path=model_path, n_substeps=n_substeps, n_actions=20,
-            initial_qpos=initial_qpos)
+            model_path=model_path,
+            n_substeps=n_substeps,
+            n_actions=20,
+            initial_qpos=initial_qpos,
+        )

    # RobotEnv methods
    # ----------------------------
@@ -23,30 +26,35 @@ class HandEnv(robot_env.RobotEnv):
        assert action.shape == (20,)

        ctrlrange = self.sim.model.actuator_ctrlrange
-        actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.
+        actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.0
        if self.relative_control:
            actuation_center = np.zeros_like(action)
            for i in range(self.sim.data.ctrl.shape[0]):
                actuation_center[i] = self.sim.data.get_joint_qpos(
-                    self.sim.model.actuator_names[i].replace(':A_', ':'))
-            for joint_name in ['FF', 'MF', 'RF', 'LF']:
+                    self.sim.model.actuator_names[i].replace(":A_", ":")
+                )
+            for joint_name in ["FF", "MF", "RF", "LF"]:
                act_idx = self.sim.model.actuator_name2id(
-                    'robot0:A_{}J1'.format(joint_name))
+                    "robot0:A_{}J1".format(joint_name)
+                )
                actuation_center[act_idx] += self.sim.data.get_joint_qpos(
-                    'robot0:{}J0'.format(joint_name))
+                    "robot0:{}J0".format(joint_name)
+                )
        else:
-            actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.
+            actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.0
        self.sim.data.ctrl[:] = actuation_center + action * actuation_range
-        self.sim.data.ctrl[:] = np.clip(self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1])
+        self.sim.data.ctrl[:] = np.clip(
+            self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1]
+        )

    def _viewer_setup(self):
-        body_id = self.sim.model.body_name2id('robot0:palm')
+        body_id = self.sim.model.body_name2id("robot0:palm")
        lookat = self.sim.data.body_xpos[body_id]
        for idx, value in enumerate(lookat):
            self.viewer.cam.lookat[idx] = value
        self.viewer.cam.distance = 0.5
-        self.viewer.cam.azimuth = 55.
-        self.viewer.cam.elevation = -25.
+        self.viewer.cam.azimuth = 55.0
+        self.viewer.cam.elevation = -25.0

-    def render(self, mode='human', width=500, height=500):
+    def render(self, mode="human", width=500, height=500):
        return super(HandEnv, self).render(mode, width, height)
--- a/gym/envs/robotics/robot_env.py
+++ b/gym/envs/robotics/robot_env.py
@@ -9,18 +9,23 @@ from gym.utils import seeding
 try:
    import mujoco_py
 except ImportError as e:
-    raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+    raise error.DependencyNotInstalled(
+        "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
+            e
+        )
+    )

 DEFAULT_SIZE = 500

+
 class RobotEnv(gym.GoalEnv):
    def __init__(self, model_path, initial_qpos, n_actions, n_substeps):
-        if model_path.startswith('/'):
+        if model_path.startswith("/"):
            fullpath = model_path
        else:
-            fullpath = os.path.join(os.path.dirname(__file__), 'assets', model_path)
+            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not os.path.exists(fullpath):
-            raise IOError('File {} does not exist'.format(fullpath))
+            raise IOError("File {} does not exist".format(fullpath))

        model = mujoco_py.load_model_from_path(fullpath)
        self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps)
@@ -28,8 +33,8 @@ class RobotEnv(gym.GoalEnv):
        self._viewers = {}

        self.metadata = {
-            'render.modes': ['human', 'rgb_array'],
-            'video.frames_per_second': int(np.round(1.0 / self.dt))
+            "render.modes": ["human", "rgb_array"],
+            "video.frames_per_second": int(np.round(1.0 / self.dt)),
        }

        self.seed()
@@ -38,12 +43,20 @@ class RobotEnv(gym.GoalEnv):

        self.goal = self._sample_goal()
        obs = self._get_obs()
-        self.action_space = spaces.Box(-1., 1., shape=(n_actions,), dtype='float32')
-        self.observation_space = spaces.Dict(dict(
-            desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
-            achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
-            observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'),
-        ))
+        self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32")
+        self.observation_space = spaces.Dict(
+            dict(
+                desired_goal=spaces.Box(
+                    -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
+                ),
+                achieved_goal=spaces.Box(
+                    -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
+                ),
+                observation=spaces.Box(
+                    -np.inf, np.inf, shape=obs["observation"].shape, dtype="float32"
+                ),
+            )
+        )

    @property
    def dt(self):
@@ -65,9 +78,9 @@ class RobotEnv(gym.GoalEnv):

        done = False
        info = {
-            'is_success': self._is_success(obs['achieved_goal'], self.goal),
+            "is_success": self._is_success(obs["achieved_goal"], self.goal),
        }
-        reward = self.compute_reward(obs['achieved_goal'], self.goal, info)
+        reward = self.compute_reward(obs["achieved_goal"], self.goal, info)
        return obs, reward, done, info

    def reset(self):
@@ -90,23 +103,23 @@ class RobotEnv(gym.GoalEnv):
            self.viewer = None
            self._viewers = {}

-    def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
+    def render(self, mode="human", width=DEFAULT_SIZE, height=DEFAULT_SIZE):
        self._render_callback()
-        if mode == 'rgb_array':
+        if mode == "rgb_array":
            self._get_viewer(mode).render(width, height)
            # window size used for old mujoco-py:
            data = self._get_viewer(mode).read_pixels(width, height, depth=False)
            # original image is upside-down, so flip it
            return data[::-1, :, :]
-        elif mode == 'human':
+        elif mode == "human":
            self._get_viewer(mode).render()

    def _get_viewer(self, mode):
        self.viewer = self._viewers.get(mode)
        if self.viewer is None:
-            if mode == 'human':
+            if mode == "human":
                self.viewer = mujoco_py.MjViewer(self.sim)
-            elif mode == 'rgb_array':
+            elif mode == "rgb_array":
                self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, device_id=-1)
            self._viewer_setup()
            self._viewers[mode] = self.viewer
@@ -126,23 +139,19 @@ class RobotEnv(gym.GoalEnv):
        return True

    def _get_obs(self):
-        """Returns the observation.
-        """
+        """Returns the observation."""
        raise NotImplementedError()

    def _set_action(self, action):
-        """Applies the given action to the simulation.
-        """
+        """Applies the given action to the simulation."""
        raise NotImplementedError()

    def _is_success(self, achieved_goal, desired_goal):
-        """Indicates whether or not the achieved goal successfully achieved the desired goal.
-        """
+        """Indicates whether or not the achieved goal successfully achieved the desired goal."""
        raise NotImplementedError()

    def _sample_goal(self):
-        """Samples a new goal and returns it.
-        """
+        """Samples a new goal and returns it."""
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
--- a/gym/envs/robotics/rotations.py
+++ b/gym/envs/robotics/rotations.py
@@ -31,7 +31,7 @@
 import numpy as np
 import itertools

-'''
+"""
 Rotations
 =========

@@ -101,7 +101,7 @@ TODO / Missing
    - Random sampling (e.g. sample uniform random rotation)
    - Performance benchmarks/measurements
    - (Maybe) define everything as to/from matricies, for simplicity
-'''
+"""

 # For testing whether a number is close to zero
 _FLOAT_EPS = np.finfo(np.float64).eps
@@ -109,7 +109,7 @@ _EPS4 = _FLOAT_EPS * 4.0


 def euler2mat(euler):
-    """ Convert Euler Angles to Rotation Matrix.  See rotation.py for notes """
+    """Convert Euler Angles to Rotation Matrix.  See rotation.py for notes"""
    euler = np.asarray(euler, dtype=np.float64)
    assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)

@@ -133,7 +133,7 @@ def euler2mat(euler):


 def euler2quat(euler):
-    """ Convert Euler Angles to Quaternions.  See rotation.py for notes """
+    """Convert Euler Angles to Quaternions.  See rotation.py for notes"""
    euler = np.asarray(euler, dtype=np.float64)
    assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)

@@ -152,27 +152,29 @@ def euler2quat(euler):


 def mat2euler(mat):
-    """ Convert Rotation Matrix to Euler Angles.  See rotation.py for notes """
+    """Convert Rotation Matrix to Euler Angles.  See rotation.py for notes"""
    mat = np.asarray(mat, dtype=np.float64)
    assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)

    cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2])
    condition = cy > _EPS4
    euler = np.empty(mat.shape[:-1], dtype=np.float64)
-    euler[..., 2] = np.where(condition,
-                             -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
-                             -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]))
-    euler[..., 1] = np.where(condition,
-                             -np.arctan2(-mat[..., 0, 2], cy),
-                             -np.arctan2(-mat[..., 0, 2], cy))
-    euler[..., 0] = np.where(condition,
-                             -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]),
-                             0.0)
+    euler[..., 2] = np.where(
+        condition,
+        -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
+        -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]),
+    )
+    euler[..., 1] = np.where(
+        condition, -np.arctan2(-mat[..., 0, 2], cy), -np.arctan2(-mat[..., 0, 2], cy)
+    )
+    euler[..., 0] = np.where(
+        condition, -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 0.0
+    )
    return euler


 def mat2quat(mat):
-    """ Convert Rotation Matrix to Quaternion.  See rotation.py for notes """
+    """Convert Rotation Matrix to Quaternion.  See rotation.py for notes"""
    mat = np.asarray(mat, dtype=np.float64)
    assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)

@@ -194,7 +196,7 @@ def mat2quat(mat):
    K /= 3.0
    # TODO: vectorize this -- probably could be made faster
    q = np.empty(K.shape[:-2] + (4,))
-    it = np.nditer(q[..., 0], flags=['multi_index'])
+    it = np.nditer(q[..., 0], flags=["multi_index"])
    while not it.finished:
        # Use Hermitian eigenvectors, values for speed
        vals, vecs = np.linalg.eigh(K[it.multi_index])
@@ -209,7 +211,7 @@ def mat2quat(mat):


 def quat2euler(quat):
-    """ Convert Quaternion to Euler Angles.  See rotation.py for notes """
+    """Convert Quaternion to Euler Angles.  See rotation.py for notes"""
    return mat2euler(quat2mat(quat))


@@ -223,7 +225,7 @@ def subtract_euler(e1, e2):


 def quat2mat(quat):
-    """ Convert Quaternion to Euler Angles.  See rotation.py for notes """
+    """Convert Quaternion to Euler Angles.  See rotation.py for notes"""
    quat = np.asarray(quat, dtype=np.float64)
    assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat)

@@ -247,11 +249,13 @@ def quat2mat(quat):
    mat[..., 2, 2] = 1.0 - (xX + yY)
    return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3))

+
 def quat_conjugate(q):
    inv_q = -q
    inv_q[..., 0] *= -1
    return inv_q

+
 def quat_mul(q0, q1):
    assert q0.shape == q1.shape
    assert q0.shape[-1] == 4
@@ -277,83 +281,97 @@ def quat_mul(q0, q1):
    assert q.shape == q0.shape
    return q

+
 def quat_rot_vec(q, v0):
    q_v0 = np.array([0, v0[0], v0[1], v0[2]])
    q_v = quat_mul(q, quat_mul(q_v0, quat_conjugate(q)))
    v = q_v[1:]
    return v

+
 def quat_identity():
    return np.array([1, 0, 0, 0])

+
 def quat2axisangle(quat):
-    theta = 0;
-    axis = np.array([0, 0, 1]);
+    theta = 0
+    axis = np.array([0, 0, 1])
    sin_theta = np.linalg.norm(quat[1:])

-    if (sin_theta > 0.0001):
+    if sin_theta > 0.0001:
        theta = 2 * np.arcsin(sin_theta)
        theta *= 1 if quat[0] >= 0 else -1
        axis = quat[1:] / sin_theta

    return axis, theta

+
 def euler2point_euler(euler):
    _euler = euler.copy()
    if len(_euler.shape) < 2:
-        _euler = np.expand_dims(_euler,0)
-    assert(_euler.shape[1] == 3)
+        _euler = np.expand_dims(_euler, 0)
+    assert _euler.shape[1] == 3
    _euler_sin = np.sin(_euler)
    _euler_cos = np.cos(_euler)
    return np.concatenate([_euler_sin, _euler_cos], axis=-1)

+
 def point_euler2euler(euler):
    _euler = euler.copy()
    if len(_euler.shape) < 2:
-        _euler = np.expand_dims(_euler,0)
-    assert(_euler.shape[1] == 6)
+        _euler = np.expand_dims(_euler, 0)
+    assert _euler.shape[1] == 6
    angle = np.arctan(_euler[..., :3] / _euler[..., 3:])
    angle[_euler[..., 3:] < 0] += np.pi
    return angle

+
 def quat2point_quat(quat):
    # Should be in qw, qx, qy, qz
    _quat = quat.copy()
    if len(_quat.shape) < 2:
        _quat = np.expand_dims(_quat, 0)
-    assert(_quat.shape[1] == 4)
-    angle = np.arccos(_quat[:,[0]]) * 2
+    assert _quat.shape[1] == 4
+    angle = np.arccos(_quat[:, [0]]) * 2
    xyz = _quat[:, 1:]
-    xyz[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5] = (xyz / np.sin(angle / 2))[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5]
-    return np.concatenate([np.sin(angle),np.cos(angle), xyz], axis=-1)
+    xyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (xyz / np.sin(angle / 2))[
+        np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
+    ]
+    return np.concatenate([np.sin(angle), np.cos(angle), xyz], axis=-1)
+

 def point_quat2quat(quat):
    _quat = quat.copy()
    if len(_quat.shape) < 2:
        _quat = np.expand_dims(_quat, 0)
-    assert(_quat.shape[1] == 5)
-    angle = np.arctan(_quat[:,[0]] / _quat[:,[1]])
+    assert _quat.shape[1] == 5
+    angle = np.arctan(_quat[:, [0]] / _quat[:, [1]])
    qw = np.cos(angle / 2)

    qxyz = _quat[:, 2:]
-    qxyz[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5] = (qxyz * np.sin(angle/2))[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5]
+    qxyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (qxyz * np.sin(angle / 2))[
+        np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
+    ]
    return np.concatenate([qw, qxyz], axis=-1)

+
 def normalize_angles(angles):
-    '''Puts angles in [-pi, pi] range.'''
+    """Puts angles in [-pi, pi] range."""
    angles = angles.copy()
    if angles.size > 0:
        angles = (angles + np.pi) % (2 * np.pi) - np.pi
-        assert -np.pi-1e-6 <= angles.min() and angles.max() <= np.pi+1e-6
+        assert -np.pi - 1e-6 <= angles.min() and angles.max() <= np.pi + 1e-6
    return angles

+
 def round_to_straight_angles(angles):
-    '''Returns closest angle modulo 90 degrees '''
+    """Returns closest angle modulo 90 degrees"""
    angles = np.round(angles / (np.pi / 2)) * (np.pi / 2)
    return normalize_angles(angles)

+
 def get_parallel_rotations():
-    mult90 = [0, np.pi/2, -np.pi/2, np.pi]
+    mult90 = [0, np.pi / 2, -np.pi / 2, np.pi]
    parallel_rotations = []
    for euler in itertools.product(mult90, repeat=3):
        canonical = mat2euler(euler2mat(euler))
--- a/gym/envs/robotics/utils.py
+++ b/gym/envs/robotics/utils.py
@@ -1,10 +1,15 @@
 import numpy as np

 from gym import error
+
 try:
    import mujoco_py
 except ImportError as e:
-    raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
+    raise error.DependencyNotInstalled(
+        "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
+            e
+        )
+    )


 def robot_get_obs(sim):
@@ -12,7 +17,7 @@ def robot_get_obs(sim):
    a robot.
    """
    if sim.data.qpos is not None and sim.model.joint_names:
-        names = [n for n in sim.model.joint_names if n.startswith('robot')]
+        names = [n for n in sim.model.joint_names if n.startswith("robot")]
        return (
            np.array([sim.data.get_joint_qpos(name) for name in names]),
            np.array([sim.data.get_joint_qvel(name) for name in names]),
@@ -25,7 +30,7 @@ def ctrl_set_action(sim, action):
    For position actuators it sets the target relative to the current qpos.
    """
    if sim.model.nmocap > 0:
-        _, action = np.split(action, (sim.model.nmocap * 7, ))
+        _, action = np.split(action, (sim.model.nmocap * 7,))
    if sim.data.ctrl is not None:
        for i in range(action.shape[0]):
            if sim.model.actuator_biastype[i] == 0:
@@ -45,7 +50,7 @@ def mocap_set_action(sim, action):
    constraint optimizer tries to center the welded body on the mocap.
    """
    if sim.model.nmocap > 0:
-        action, _ = np.split(action, (sim.model.nmocap * 7, ))
+        action, _ = np.split(action, (sim.model.nmocap * 7,))
        action = action.reshape(sim.model.nmocap, 7)

        pos_delta = action[:, :3]
@@ -57,13 +62,11 @@ def mocap_set_action(sim, action):


 def reset_mocap_welds(sim):
-    """Resets the mocap welds that we use for actuation.
-    """
+    """Resets the mocap welds that we use for actuation."""
    if sim.model.nmocap > 0 and sim.model.eq_data is not None:
        for i in range(sim.model.eq_data.shape[0]):
            if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
-                sim.model.eq_data[i, :] = np.array(
-                    [0., 0., 0., 1., 0., 0., 0.])
+                sim.model.eq_data[i, :] = np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
    sim.forward()


@@ -72,13 +75,15 @@ def reset_mocap2body_xpos(sim):
    values as the bodies they're welded to.
    """

-    if (sim.model.eq_type is None or
-        sim.model.eq_obj1id is None or
-        sim.model.eq_obj2id is None):
+    if (
+        sim.model.eq_type is None
+        or sim.model.eq_obj1id is None
+        or sim.model.eq_obj2id is None
+    ):
        return
-    for eq_type, obj1_id, obj2_id in zip(sim.model.eq_type,
-                                         sim.model.eq_obj1id,
-                                         sim.model.eq_obj2id):
+    for eq_type, obj1_id, obj2_id in zip(
+        sim.model.eq_type, sim.model.eq_obj1id, sim.model.eq_obj2id
+    ):
        if eq_type != mujoco_py.const.EQ_WELD:
            continue

@@ -91,6 +96,6 @@ def reset_mocap2body_xpos(sim):
            mocap_id = sim.model.body_mocapid[obj2_id]
            body_idx = obj1_id

-        assert (mocap_id != -1)
+        assert mocap_id != -1
        sim.data.mocap_pos[mocap_id][:] = sim.data.body_xpos[body_idx]
        sim.data.mocap_quat[mocap_id][:] = sim.data.body_xquat[body_idx]
--- a/gym/envs/tests/spec_list.py
+++ b/gym/envs/tests/spec_list.py
@@ -4,40 +4,54 @@ import os

 SKIP_MUJOCO_WARNING_MESSAGE = (
    "Cannot run mujoco test (either license key not found or mujoco not"
-    "installed properly).")
+    "installed properly)."
+)


-skip_mujoco = not (os.environ.get('MUJOCO_KEY'))
+skip_mujoco = not (os.environ.get("MUJOCO_KEY"))
 if not skip_mujoco:
    try:
        import mujoco_py
    except ImportError:
        skip_mujoco = True

+
 def should_skip_env_spec_for_tests(spec):
    # We skip tests for envs that require dependencies or are otherwise
    # troublesome to run frequently
    ep = spec.entry_point
    # Skip mujoco tests for pull request CI
-    if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')):
+    if skip_mujoco and (
+        ep.startswith("gym.envs.mujoco") or ep.startswith("gym.envs.robotics:")
+    ):
        return True
    try:
        import atari_py
    except ImportError:
-        if ep.startswith('gym.envs.atari'):
+        if ep.startswith("gym.envs.atari"):
            return True
    try:
        import Box2D
    except ImportError:
-        if ep.startswith('gym.envs.box2d'):
+        if ep.startswith("gym.envs.box2d"):
            return True

-    if (    'GoEnv' in ep or
-            'HexEnv' in ep or
-            (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
+    if (
+        "GoEnv" in ep
+        or "HexEnv" in ep
+        or (
+            ep.startswith("gym.envs.atari")
+            and not spec.id.startswith("Pong")
+            and not spec.id.startswith("Seaquest")
+        )
    ):
        logger.warn("Skipping tests for env {}".format(ep))
        return True
    return False

-spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)]
+
+spec_list = [
+    spec
+    for spec in sorted(envs.registry.all(), key=lambda x: x.id)
+    if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)
+]
--- a/gym/envs/tests/test_determinism.py
+++ b/gym/envs/tests/test_determinism.py
@@ -3,6 +3,7 @@ import pytest

 from gym.envs.tests.spec_list import spec_list

+
@pytest.mark.parametrize("spec", spec_list)
 def test_env(spec):
    # Note that this precludes running this test in multiple
@@ -24,15 +25,21 @@ def test_env(spec):
    step_responses2 = [env2.step(action) for action in action_samples2]
    env2.close()

-    for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
+    for i, (action_sample1, action_sample2) in enumerate(
+        zip(action_samples1, action_samples2)
+    ):
        try:
            assert_equals(action_sample1, action_sample2)
        except AssertionError:
-            print('env1.action_space=', env1.action_space)
-            print('env2.action_space=', env2.action_space)
-            print('action_samples1=', action_samples1)
-            print('action_samples2=', action_samples2)
-            print('[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2))
+            print("env1.action_space=", env1.action_space)
+            print("env2.action_space=", env2.action_space)
+            print("action_samples1=", action_samples1)
+            print("action_samples2=", action_samples2)
+            print(
+                "[{}] action_sample1: {}, action_sample2: {}".format(
+                    i, action_sample1, action_sample2
+                )
+            )
            raise

    # Don't check rollout equality if it's a a nondeterministic
@@ -42,21 +49,26 @@ def test_env(spec):

    assert_equals(initial_observation1, initial_observation2)

-    for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
-        assert_equals(o1, o2, '[{}] '.format(i))
-        assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
-        assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)
+    for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(
+        zip(step_responses1, step_responses2)
+    ):
+        assert_equals(o1, o2, "[{}] ".format(i))
+        assert r1 == r2, "[{}] r1: {}, r2: {}".format(i, r1, r2)
+        assert d1 == d2, "[{}] d1: {}, d2: {}".format(i, d1, d2)

        # Go returns a Pachi game board in info, which doesn't
        # properly check equality. For now, we hack around this by
        # just skipping Go.
-        if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
-            assert_equals(i1, i2, '[{}] '.format(i))
+        if spec.id not in ["Go9x9-v0", "Go19x19-v0"]:
+            assert_equals(i1, i2, "[{}] ".format(i))
+

 def assert_equals(a, b, prefix=None):
    assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b)
    if isinstance(a, dict):
-        assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b)
+        assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(
+            prefix, a, b
+        )

        for k in a.keys():
            v_a = a[k]
--- a/gym/envs/tests/test_envs.py
+++ b/gym/envs/tests/test_envs.py
@@ -4,6 +4,7 @@ import numpy as np
 from gym import envs
 from gym.envs.tests.spec_list import spec_list

+
 # This runs a smoketest on each official registered env. We may want
 # to try also running environments which are not officially registered
 # envs.
@@ -15,30 +16,33 @@ def test_env(spec):

    # Check that dtype is explicitly declared for gym.Box spaces
    for warning_msg in warnings:
-        assert not 'autodetected dtype' in str(warning_msg.message)
+        assert "autodetected dtype" not in str(warning_msg.message)

    ob_space = env.observation_space
    act_space = env.action_space
    ob = env.reset()
-    assert ob_space.contains(ob), 'Reset observation: {!r} not in space'.format(ob)
+    assert ob_space.contains(ob), "Reset observation: {!r} not in space".format(ob)
    a = act_space.sample()
    observation, reward, done, _info = env.step(a)
-    assert ob_space.contains(observation), 'Step observation: {!r} not in space'.format(observation)
+    assert ob_space.contains(observation), "Step observation: {!r} not in space".format(
+        observation
+    )
    assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env)
    assert isinstance(done, bool), "Expected {} to be a boolean".format(done)

-    for mode in env.metadata.get('render.modes', []):
+    for mode in env.metadata.get("render.modes", []):
        env.render(mode=mode)

    # Make sure we can render the environment after close.
-    for mode in env.metadata.get('render.modes', []):
+    for mode in env.metadata.get("render.modes", []):
        env.render(mode=mode)

    env.close()

+
 # Run a longer rollout on some environments
 def test_random_rollout():
-    for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
+    for env in [envs.make("CartPole-v0"), envs.make("FrozenLake-v0")]:
        agent = lambda ob: env.action_space.sample()
        ob = env.reset()
        for _ in range(10):
@@ -46,19 +50,20 @@ def test_random_rollout():
            a = agent(ob)
            assert env.action_space.contains(a)
            (ob, _reward, done, _info) = env.step(a)
-            if done: break
+            if done:
+                break
        env.close()


 def test_env_render_result_is_immutable():
    environs = [
-        envs.make('Taxi-v3'),
-        envs.make('FrozenLake-v0'),
-        envs.make('Reverse-v0'),
+        envs.make("Taxi-v3"),
+        envs.make("FrozenLake-v0"),
+        envs.make("Reverse-v0"),
    ]

    for env in environs:
        env.reset()
-        output = env.render(mode='ansi')
+        output = env.render(mode="ansi")
        assert isinstance(output, str)
        env.close()
--- a/gym/envs/tests/test_envs_semantics.py
+++ b/gym/envs/tests/test_envs_semantics.py
@@ -17,14 +17,18 @@ ROLLOUT_STEPS = 100
 episodes = ROLLOUT_STEPS
 steps = ROLLOUT_STEPS

-ROLLOUT_FILE = os.path.join(DATA_DIR, 'rollout.json')
+ROLLOUT_FILE = os.path.join(DATA_DIR, "rollout.json")

 if not os.path.isfile(ROLLOUT_FILE):
    with open(ROLLOUT_FILE, "w") as outfile:
        json.dump({}, outfile, indent=2)

+
 def hash_object(unhashed):
-    return hashlib.sha256(str(unhashed).encode('utf-16')).hexdigest() # This is really bad, str could be same while values change
+    return hashlib.sha256(
+        str(unhashed).encode("utf-16")
+    ).hexdigest()  # This is really bad, str could be same while values change
+

 def generate_rollout_hash(spec):
    spaces.seed(0)
@@ -38,7 +42,8 @@ def generate_rollout_hash(spec):

    total_steps = 0
    for episode in range(episodes):
-        if total_steps >= ROLLOUT_STEPS: break
+        if total_steps >= ROLLOUT_STEPS:
+            break
        observation = env.reset()

        for step in range(steps):
@@ -51,9 +56,11 @@ def generate_rollout_hash(spec):
            done_list.append(done)

            total_steps += 1
-            if total_steps >= ROLLOUT_STEPS: break
+            if total_steps >= ROLLOUT_STEPS:
+                break

-            if done: break
+            if done:
+                break

    observations_hash = hash_object(observation_list)
    actions_hash = hash_object(action_list)
@@ -63,6 +70,7 @@ def generate_rollout_hash(spec):
    env.close()
    return observations_hash, actions_hash, rewards_hash, dones_hash

+
@pytest.mark.parametrize("spec", spec_list)
 def test_env_semantics(spec):
    logger.warn("Skipping this test. Existing hashes were generated in a bad way")
@@ -72,7 +80,11 @@ def test_env_semantics(spec):

    if spec.id not in rollout_dict:
        if not spec.nondeterministic:
-            logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))
+            logger.warn(
+                "Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(
+                    spec.id
+                )
+            )
        return

    logger.info("Testing rollout for {} environment...".format(spec.id))
@@ -80,14 +92,30 @@ def test_env_semantics(spec):
    observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)

    errors = []
-    if rollout_dict[spec.id]['observations'] != observations_now:
-        errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
-    if rollout_dict[spec.id]['actions'] != actions_now:
-        errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
-    if rollout_dict[spec.id]['rewards'] != rewards_now:
-        errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
-    if rollout_dict[spec.id]['dones'] != dones_now:
-        errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
+    if rollout_dict[spec.id]["observations"] != observations_now:
+        errors.append(
+            "Observations not equal for {} -- expected {} but got {}".format(
+                spec.id, rollout_dict[spec.id]["observations"], observations_now
+            )
+        )
+    if rollout_dict[spec.id]["actions"] != actions_now:
+        errors.append(
+            "Actions not equal for {} -- expected {} but got {}".format(
+                spec.id, rollout_dict[spec.id]["actions"], actions_now
+            )
+        )
+    if rollout_dict[spec.id]["rewards"] != rewards_now:
+        errors.append(
+            "Rewards not equal for {} -- expected {} but got {}".format(
+                spec.id, rollout_dict[spec.id]["rewards"], rewards_now
+            )
+        )
+    if rollout_dict[spec.id]["dones"] != dones_now:
+        errors.append(
+            "Dones not equal for {} -- expected {} but got {}".format(
+                spec.id, rollout_dict[spec.id]["dones"], dones_now
+            )
+        )
    if len(errors):
        for error in errors:
            logger.warn(error)
--- a/gym/envs/tests/test_frozenlake_dfs.py
+++ b/gym/envs/tests/test_frozenlake_dfs.py
@@ -3,25 +3,25 @@ import numpy as np

 from gym.envs.toy_text.frozen_lake import generate_random_map

+
 # Test that FrozenLake map generation creates valid maps of various sizes.
 def test_frozenlake_dfs_map_generation():
-
    def frozenlake_dfs_path_exists(res):
        frontier, discovered = [], set()
-        frontier.append((0,0))
+        frontier.append((0, 0))
        while frontier:
            r, c = frontier.pop()
-            if not (r,c) in discovered:
-                discovered.add((r,c))
+            if not (r, c) in discovered:
+                discovered.add((r, c))
                directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
                for x, y in directions:
                    r_new = r + x
                    c_new = c + y
                    if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
                        continue
-                    if res[r_new][c_new] == 'G':
+                    if res[r_new][c_new] == "G":
                        return True
-                    if (res[r_new][c_new] not in '#H'):
+                    if res[r_new][c_new] not in "#H":
                        frontier.append((r_new, c_new))
        return False

--- a/gym/envs/tests/test_mujoco_v2_to_v3_conversion.py
+++ b/gym/envs/tests/test_mujoco_v2_to_v3_conversion.py
@@ -4,10 +4,9 @@ from gym import envs
 from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE


-def verify_environments_match(old_environment_id,
-                              new_environment_id,
-                              seed=1,
-                              num_actions=1000):
+def verify_environments_match(
+    old_environment_id, new_environment_id, seed=1, num_actions=1000
+):
    old_environment = envs.make(old_environment_id)
    new_environment = envs.make(new_environment_id)

@@ -21,10 +20,8 @@ def verify_environments_match(old_environment_id,

    for i in range(num_actions):
        action = old_environment.action_space.sample()
-        old_observation, old_reward, old_done, old_info = old_environment.step(
-            action)
-        new_observation, new_reward, new_done, new_info = new_environment.step(
-            action)
+        old_observation, old_reward, old_done, old_info = old_environment.step(action)
+        new_observation, new_reward, new_done, new_info = new_environment.step(action)

        eps = 1e-6
        np.testing.assert_allclose(old_observation, new_observation, atol=eps)
@@ -39,47 +36,29 @@ def verify_environments_match(old_environment_id,
 class Mujocov2Tov3ConversionTest(unittest.TestCase):
    def test_environments_match(self):
        test_cases = (
-            {
-                'old_id': 'Swimmer-v2',
-                'new_id': 'Swimmer-v3'
-             },
-            {
-                'old_id': 'Hopper-v2',
-                'new_id': 'Hopper-v3'
-             },
-            {
-                'old_id': 'Walker2d-v2',
-                'new_id': 'Walker2d-v3'
-             },
-            {
-                'old_id': 'HalfCheetah-v2',
-                'new_id': 'HalfCheetah-v3'
-             },
-            {
-                'old_id': 'Ant-v2',
-                'new_id': 'Ant-v3'
-             },
-            {
-                'old_id': 'Humanoid-v2',
-                'new_id': 'Humanoid-v3'
-             },
+            {"old_id": "Swimmer-v2", "new_id": "Swimmer-v3"},
+            {"old_id": "Hopper-v2", "new_id": "Hopper-v3"},
+            {"old_id": "Walker2d-v2", "new_id": "Walker2d-v3"},
+            {"old_id": "HalfCheetah-v2", "new_id": "HalfCheetah-v3"},
+            {"old_id": "Ant-v2", "new_id": "Ant-v3"},
+            {"old_id": "Humanoid-v2", "new_id": "Humanoid-v3"},
        )

        for test_case in test_cases:
-            verify_environments_match(test_case['old_id'], test_case['new_id'])
+            verify_environments_match(test_case["old_id"], test_case["new_id"])

        # Raises KeyError because the new envs have extra info
        with self.assertRaises(KeyError):
-            verify_environments_match('Swimmer-v3', 'Swimmer-v2')
+            verify_environments_match("Swimmer-v3", "Swimmer-v2")

        # Raises KeyError because the new envs have extra info
        with self.assertRaises(KeyError):
-            verify_environments_match('Humanoid-v3', 'Humanoid-v2')
+            verify_environments_match("Humanoid-v3", "Humanoid-v2")

        # Raises KeyError because the new envs have extra info
        with self.assertRaises(KeyError):
-            verify_environments_match('Swimmer-v3', 'Swimmer-v2')
+            verify_environments_match("Swimmer-v3", "Swimmer-v2")


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/gym/envs/tests/test_registration.py
+++ b/gym/envs/tests/test_registration.py
@@ -4,76 +4,87 @@ from gym import error, envs
 from gym.envs import registration
 from gym.envs.classic_control import cartpole

+
 class ArgumentEnv(gym.Env):
    def __init__(self, arg1, arg2, arg3):
        self.arg1 = arg1
        self.arg2 = arg2
        self.arg3 = arg3

+
 gym.register(
-    id='test.ArgumentEnv-v0',
-    entry_point='gym.envs.tests.test_registration:ArgumentEnv',
+    id="test.ArgumentEnv-v0",
+    entry_point="gym.envs.tests.test_registration:ArgumentEnv",
    kwargs={
-        'arg1': 'arg1',
-        'arg2': 'arg2',
-    }
+        "arg1": "arg1",
+        "arg2": "arg2",
+    },
 )

+
 def test_make():
-    env = envs.make('CartPole-v0')
-    assert env.spec.id == 'CartPole-v0'
+    env = envs.make("CartPole-v0")
+    assert env.spec.id == "CartPole-v0"
    assert isinstance(env.unwrapped, cartpole.CartPoleEnv)

+
 def test_make_with_kwargs():
-    env = envs.make('test.ArgumentEnv-v0', arg2='override_arg2', arg3='override_arg3')
-    assert env.spec.id == 'test.ArgumentEnv-v0'
+    env = envs.make("test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3")
+    assert env.spec.id == "test.ArgumentEnv-v0"
    assert isinstance(env.unwrapped, ArgumentEnv)
-    assert env.arg1 == 'arg1'
-    assert env.arg2 == 'override_arg2'
-    assert env.arg3 == 'override_arg3'
+    assert env.arg1 == "arg1"
+    assert env.arg2 == "override_arg2"
+    assert env.arg3 == "override_arg3"
+

 def test_make_deprecated():
    try:
-        envs.make('Humanoid-v0')
+        envs.make("Humanoid-v0")
    except error.Error:
        pass
    else:
        assert False

+
 def test_spec():
-    spec = envs.spec('CartPole-v0')
-    assert spec.id == 'CartPole-v0'
+    spec = envs.spec("CartPole-v0")
+    assert spec.id == "CartPole-v0"
+

 def test_spec_with_kwargs():
-    map_name_value = '8x8'
-    env = gym.make('FrozenLake-v0', map_name=map_name_value)
-    assert env.spec._kwargs['map_name'] == map_name_value
+    map_name_value = "8x8"
+    env = gym.make("FrozenLake-v0", map_name=map_name_value)
+    assert env.spec._kwargs["map_name"] == map_name_value
+

 def test_missing_lookup():
    registry = registration.EnvRegistry()
-    registry.register(id='Test-v0', entry_point=None)
-    registry.register(id='Test-v15', entry_point=None)
-    registry.register(id='Test-v9', entry_point=None)
-    registry.register(id='Other-v100', entry_point=None)
+    registry.register(id="Test-v0", entry_point=None)
+    registry.register(id="Test-v15", entry_point=None)
+    registry.register(id="Test-v9", entry_point=None)
+    registry.register(id="Other-v100", entry_point=None)
    try:
-        registry.spec('Test-v1')  # must match an env name but not the version above
+        registry.spec("Test-v1")  # must match an env name but not the version above
    except error.DeprecatedEnv:
        pass
    else:
        assert False

    try:
-        registry.spec('Unknown-v1')
+        registry.spec("Unknown-v1")
    except error.UnregisteredEnv:
        pass
    else:
        assert False

+
 def test_malformed_lookup():
    registry = registration.EnvRegistry()
    try:
-        registry.spec(u'“Breakout-v0”')
+        registry.spec(u"“Breakout-v0”")
    except error.Error as e:
-        assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e)
+        assert "malformed environment ID" in "{}".format(
+            e
+        ), "Unexpected message: {}".format(e)
    else:
        assert False
--- a/gym/envs/toy_text/init.py
+++ b/gym/envs/toy_text/init.py
@@ -8,5 +8,3 @@ from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv
 from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv
 from gym.envs.toy_text.cliffwalking import CliffWalkingEnv
 from gym.envs.toy_text.taxi import TaxiEnv
-from gym.envs.toy_text.guessing_game import GuessingGame
-from gym.envs.toy_text.hotter_colder import HotterColder
--- a/gym/envs/toy_text/blackjack.py
+++ b/gym/envs/toy_text/blackjack.py
@@ -72,12 +72,12 @@ class BlackjackEnv(gym.Env):
    by Sutton and Barto.
    http://incompleteideas.net/book/the-book-2nd.html
    """
+
    def __init__(self, natural=False):
        self.action_space = spaces.Discrete(2)
-        self.observation_space = spaces.Tuple((
-            spaces.Discrete(32),
-            spaces.Discrete(11),
-            spaces.Discrete(2)))
+        self.observation_space = spaces.Tuple(
+            (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))
+        )
        self.seed()

        # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
@@ -96,16 +96,16 @@ class BlackjackEnv(gym.Env):
            self.player.append(draw_card(self.np_random))
            if is_bust(self.player):
                done = True
-                reward = -1.
+                reward = -1.0
            else:
                done = False
-                reward = 0.
+                reward = 0.0
        else:  # stick: play out the dealers hand, and score
            done = True
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.np_random))
            reward = cmp(score(self.player), score(self.dealer))
-            if self.natural and is_natural(self.player) and reward == 1.:
+            if self.natural and is_natural(self.player) and reward == 1.0:
                reward = 1.5
        return self._get_obs(), reward, done, {}

--- a/gym/envs/toy_text/cliffwalking.py
+++ b/gym/envs/toy_text/cliffwalking.py
@@ -30,7 +30,8 @@ class CliffWalkingEnv(discrete.DiscreteEnv):
    Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward
    and a reset to the start. An episode terminates when the agent reaches the goal.
    """
-    metadata = {'render.modes': ['human', 'ansi']}
+
+    metadata = {"render.modes": ["human", "ansi"]}

    def __init__(self):
        self.shape = (4, 12)
@@ -89,8 +90,8 @@ class CliffWalkingEnv(discrete.DiscreteEnv):
        is_done = tuple(new_position) == terminal_state
        return [(1.0, new_state, -1, is_done)]

-    def render(self, mode='human'):
-        outfile = StringIO() if mode == 'ansi' else sys.stdout
+    def render(self, mode="human"):
+        outfile = StringIO() if mode == "ansi" else sys.stdout

        for s in range(self.nS):
            position = np.unravel_index(s, self.shape)
@@ -108,12 +109,12 @@ class CliffWalkingEnv(discrete.DiscreteEnv):
                output = output.lstrip()
            if position[1] == self.shape[1] - 1:
                output = output.rstrip()
-                output += '\n'
+                output += "\n"

            outfile.write(output)
-        outfile.write('\n')
+        outfile.write("\n")

        # No need to return anything for human
-        if mode != 'human':
+        if mode != "human":
            with closing(outfile):
-                return outfile.getvalue()
+                return outfile.getvalue()
--- a/gym/envs/toy_text/discrete.py
+++ b/gym/envs/toy_text/discrete.py
@@ -29,6 +29,7 @@ class DiscreteEnv(Env):


    """
+
    def __init__(self, nS, nA, P, isd):
        self.P = P
        self.isd = isd
--- a/gym/envs/toy_text/frozen_lake.py
+++ b/gym/envs/toy_text/frozen_lake.py
@@ -13,12 +13,7 @@ RIGHT = 2
 UP = 3

 MAPS = {
-    "4x4": [
-        "SFFF",
-        "FHFH",
-        "FFFH",
-        "HFFG"
-    ],
+    "4x4": ["SFFF", "FHFH", "FFFH", "HFFG"],
    "8x8": [
        "SFFFFFFF",
        "FFFFFFFF",
@@ -27,7 +22,7 @@ MAPS = {
        "FFFHFFFF",
        "FHHFFFHF",
        "FHFFHFHF",
-        "FFFHFFFG"
+        "FFFHFFFG",
    ],
 }

@@ -53,17 +48,17 @@ def generate_random_map(size=8, p=0.8):
                    c_new = c + y
                    if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
                        continue
-                    if res[r_new][c_new] == 'G':
+                    if res[r_new][c_new] == "G":
                        return True
-                    if (res[r_new][c_new] != 'H'):
+                    if res[r_new][c_new] != "H":
                        frontier.append((r_new, c_new))
        return False

    while not valid:
        p = min(1, p)
-        res = np.random.choice(['F', 'H'], (size, size), p=[p, 1-p])
-        res[0][0] = 'S'
-        res[-1][-1] = 'G'
+        res = np.random.choice(["F", "H"], (size, size), p=[p, 1 - p])
+        res[0][0] = "S"
+        res[-1][-1] = "G"
        valid = is_valid(res)
    return ["".join(x) for x in res]

@@ -94,27 +89,27 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
    You receive a reward of 1 if you reach the goal, and zero otherwise.
    """

-    metadata = {'render.modes': ['human', 'ansi']}
+    metadata = {"render.modes": ["human", "ansi"]}

    def __init__(self, desc=None, map_name="4x4", is_slippery=True):
        if desc is None and map_name is None:
            desc = generate_random_map()
        elif desc is None:
            desc = MAPS[map_name]
-        self.desc = desc = np.asarray(desc, dtype='c')
+        self.desc = desc = np.asarray(desc, dtype="c")
        self.nrow, self.ncol = nrow, ncol = desc.shape
        self.reward_range = (0, 1)

        nA = 4
        nS = nrow * ncol

-        isd = np.array(desc == b'S').astype('float64').ravel()
+        isd = np.array(desc == b"S").astype("float64").ravel()
        isd /= isd.sum()

        P = {s: {a: [] for a in range(nA)} for s in range(nS)}

        def to_s(row, col):
-            return row*ncol + col
+            return row * ncol + col

        def inc(row, col, a):
            if a == LEFT:
@@ -131,8 +126,8 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
            newrow, newcol = inc(row, col, action)
            newstate = to_s(newrow, newcol)
            newletter = desc[newrow, newcol]
-            done = bytes(newletter) in b'GH'
-            reward = float(newletter == b'G')
+            done = bytes(newletter) in b"GH"
+            reward = float(newletter == b"G")
            return newstate, reward, done

        for row in range(nrow):
@@ -141,36 +136,34 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
                for a in range(4):
                    li = P[s][a]
                    letter = desc[row, col]
-                    if letter in b'GH':
+                    if letter in b"GH":
                        li.append((1.0, s, 0, True))
                    else:
                        if is_slippery:
                            for b in [(a - 1) % 4, a, (a + 1) % 4]:
-                                li.append((
-                                    1. / 3.,
-                                    *update_probability_matrix(row, col, b)
-                                ))
+                                li.append(
+                                    (1.0 / 3.0, *update_probability_matrix(row, col, b))
+                                )
                        else:
-                            li.append((
-                                1., *update_probability_matrix(row, col, a)
-                            ))
+                            li.append((1.0, *update_probability_matrix(row, col, a)))

        super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)

-    def render(self, mode='human'):
-        outfile = StringIO() if mode == 'ansi' else sys.stdout
+    def render(self, mode="human"):
+        outfile = StringIO() if mode == "ansi" else sys.stdout

        row, col = self.s // self.ncol, self.s % self.ncol
        desc = self.desc.tolist()
-        desc = [[c.decode('utf-8') for c in line] for line in desc]
+        desc = [[c.decode("utf-8") for c in line] for line in desc]
        desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
        if self.lastaction is not None:
-            outfile.write("  ({})\n".format(
-                ["Left", "Down", "Right", "Up"][self.lastaction]))
+            outfile.write(
+                "  ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction])
+            )
        else:
            outfile.write("\n")
-        outfile.write("\n".join(''.join(line) for line in desc)+"\n")
+        outfile.write("\n".join("".join(line) for line in desc) + "\n")

-        if mode != 'human':
+        if mode != "human":
            with closing(outfile):
                return outfile.getvalue()
--- a/gym/envs/toy_text/guessing_game.py
+++ b/gym/envs/toy_text/guessing_game.py
@@ -37,12 +37,15 @@ class GuessingGame(gym.Env):
    The perfect agent would likely learn the bounds of the action space (without referring
    to them explicitly) and then follow binary tree style exploration towards to goal number
    """
+
    def __init__(self):
        self.range = 1000  # Randomly selected number is within +/- this value
        self.bounds = 10000

-        self.action_space = spaces.Box(low=np.array([-self.bounds]).astype(np.float32)  ,
-                                       high=np.array([self.bounds]).astype(np.float32)  )
+        self.action_space = spaces.Box(
+            low=np.array([-self.bounds]).astype(np.float32),
+            high=np.array([self.bounds]).astype(np.float32),
+        )
        self.observation_space = spaces.Discrete(4)

        self.number = 0
@@ -77,7 +80,11 @@ class GuessingGame(gym.Env):
        reward = 0
        done = False

-        if (self.number - self.range * 0.01) < action < (self.number + self.range * 0.01):
+        if (
+            (self.number - self.range * 0.01)
+            < action
+            < (self.number + self.range * 0.01)
+        ):
            reward = 1
            done = True

@@ -85,7 +92,12 @@ class GuessingGame(gym.Env):
        if self.guess_count >= self.guess_max:
            done = True

-        return self.observation, reward, done, {"number": self.number, "guesses": self.guess_count}
+        return (
+            self.observation,
+            reward,
+            done,
+            {"number": self.number, "guesses": self.guess_count},
+        )

    def reset(self):
        self.number = self.np_random.uniform(-self.range, self.range)
--- a/gym/envs/toy_text/hotter_colder.py
+++ b/gym/envs/toy_text/hotter_colder.py
@@ -22,11 +22,15 @@ class HotterColder(gym.Env):
    increase the rate in which is guesses in that direction until the reward reaches
    its maximum
    """
+
    def __init__(self):
        self.range = 1000  # +/- the value number can be between
        self.bounds = 2000  # Action space bounds

-        self.action_space = spaces.Box(low=np.array([-self.bounds]).astype(np.float32), high=np.array([self.bounds]).astype(np.float32))
+        self.action_space = spaces.Box(
+            low=np.array([-self.bounds]).astype(np.float32),
+            high=np.array([self.bounds]).astype(np.float32),
+        )
        self.observation_space = spaces.Discrete(4)

        self.number = 0
@@ -58,12 +62,20 @@ class HotterColder(gym.Env):
        elif action > self.number:
            self.observation = 3

-        reward = ((min(action, self.number) + self.bounds) / (max(action, self.number) + self.bounds)) ** 2
+        reward = (
+            (min(action, self.number) + self.bounds)
+            / (max(action, self.number) + self.bounds)
+        ) ** 2

        self.guess_count += 1
        done = self.guess_count >= self.guess_max

-        return self.observation, reward[0], done, {"number": self.number, "guesses": self.guess_count}
+        return (
+            self.observation,
+            reward[0],
+            done,
+            {"number": self.number, "guesses": self.guess_count},
+        )

    def reset(self):
        self.number = self.np_random.uniform(-self.range, self.range)
--- a/gym/envs/toy_text/kellycoinflip.py
+++ b/gym/envs/toy_text/kellycoinflip.py
@@ -36,15 +36,19 @@ class KellyCoinflipEnv(gym.Env):
    mistaken bet could end the episode immediately; it's not clear to me which version
    would be better.) For a harder version which randomizes the 3 key parameters, see the
    Generalized Kelly coinflip game."""
-    metadata = {'render.modes': ['human']}
+
+    metadata = {"render.modes": ["human"]}

    def __init__(self, initial_wealth=25.0, edge=0.6, max_wealth=250.0, max_rounds=300):

        self.action_space = spaces.Discrete(int(max_wealth * 100))  # betting in penny
        # increments
-        self.observation_space = spaces.Tuple((
-            spaces.Box(0, max_wealth, [1], dtype=np.float32),  # (w,b)
-            spaces.Discrete(max_rounds + 1)))
+        self.observation_space = spaces.Tuple(
+            (
+                spaces.Box(0, max_wealth, [1], dtype=np.float32),  # (w,b)
+                spaces.Discrete(max_rounds + 1),
+            )
+        )
        self.reward_range = (0, max_wealth)
        self.edge = edge
        self.wealth = initial_wealth
@@ -61,7 +65,9 @@ class KellyCoinflipEnv(gym.Env):
        return [seed]

    def step(self, action):
-        bet_in_dollars = min(action/100.0, self.wealth)  # action = desired bet in pennies
+        bet_in_dollars = min(
+            action / 100.0, self.wealth
+        )  # action = desired bet in pennies
        self.rounds -= 1

        coinflip = flip(self.edge, self.np_random)
@@ -80,7 +86,7 @@ class KellyCoinflipEnv(gym.Env):
        self.wealth = self.initial_wealth
        return self._get_obs()

-    def render(self, mode='human'):
+    def render(self, mode="human"):
        print("Current wealth: ", self.wealth, "; Rounds left: ", self.rounds)


@@ -106,11 +112,21 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
    for the edge case alone suggests that the Bayes-optimal value may be very close to
    what one would calculate using a decision tree for any specific case), and
    represents a good challenge for RL agents."""
-    metadata = {'render.modes': ['human']}

-    def __init__(self, initial_wealth=25.0, edge_prior_alpha=7, edge_prior_beta=3,
-                 max_wealth_alpha=5.0, max_wealth_m=200.0, max_rounds_mean=300.0,
-                 max_rounds_sd=25.0, reseed=True, clip_distributions=False):
+    metadata = {"render.modes": ["human"]}
+
+    def __init__(
+        self,
+        initial_wealth=25.0,
+        edge_prior_alpha=7,
+        edge_prior_beta=3,
+        max_wealth_alpha=5.0,
+        max_wealth_m=200.0,
+        max_rounds_mean=300.0,
+        max_rounds_sd=25.0,
+        reseed=True,
+        clip_distributions=False,
+    ):
        # clip_distributions=True asserts that state and action space are not modified at reset()

        # store the hyper-parameters for passing back into __init__() during resets so
@@ -126,28 +142,42 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
        self.max_rounds_sd = max_rounds_sd
        self.clip_distributions = clip_distributions

-        if reseed or not hasattr(self, 'np_random'):
+        if reseed or not hasattr(self, "np_random"):
            self.seed()

        # draw this game's set of parameters:
        edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta)
        if self.clip_distributions:
            # (clip/resample some parameters to be able to fix obs/action space sizes/bounds)
-            max_wealth_bound = round(genpareto.ppf(0.85, max_wealth_alpha, max_wealth_m))
+            max_wealth_bound = round(
+                genpareto.ppf(0.85, max_wealth_alpha, max_wealth_m)
+            )
            max_wealth = max_wealth_bound + 1.0
            while max_wealth > max_wealth_bound:
-                max_wealth = round(genpareto.rvs(max_wealth_alpha, max_wealth_m,
-                                                 random_state=self.np_random))
-            max_rounds_bound = int(round(norm.ppf(0.99, max_rounds_mean, max_rounds_sd)))
+                max_wealth = round(
+                    genpareto.rvs(
+                        max_wealth_alpha, max_wealth_m, random_state=self.np_random
+                    )
+                )
+            max_rounds_bound = int(
+                round(norm.ppf(0.99, max_rounds_mean, max_rounds_sd))
+            )
            max_rounds = max_rounds_bound + 1
            while max_rounds > max_rounds_bound:
-                max_rounds = int(round(self.np_random.normal(max_rounds_mean, max_rounds_sd)))
+                max_rounds = int(
+                    round(self.np_random.normal(max_rounds_mean, max_rounds_sd))
+                )

        else:
-            max_wealth = round(genpareto.rvs(max_wealth_alpha, max_wealth_m,
-                                             random_state=self.np_random))
+            max_wealth = round(
+                genpareto.rvs(
+                    max_wealth_alpha, max_wealth_m, random_state=self.np_random
+                )
+            )
            max_wealth_bound = max_wealth
-            max_rounds = int(round(self.np_random.normal(max_rounds_mean, max_rounds_sd)))
+            max_rounds = int(
+                round(self.np_random.normal(max_rounds_mean, max_rounds_sd))
+            )
            max_rounds_bound = max_rounds

        # add an additional global variable which is the sufficient statistic for the
@@ -161,13 +191,18 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
        self.rounds_elapsed = 0

        # the rest proceeds as before:
-        self.action_space = spaces.Discrete(int(max_wealth_bound*100))
-        self.observation_space = spaces.Tuple((
-            spaces.Box(0, max_wealth_bound, shape=[1], dtype=np.float32),  # current wealth
-            spaces.Discrete(max_rounds_bound+1),  # rounds elapsed
-            spaces.Discrete(max_rounds_bound+1),  # wins
-            spaces.Discrete(max_rounds_bound+1),  # losses
-            spaces.Box(0, max_wealth_bound, [1], dtype=np.float32)))  # maximum observed wealth
+        self.action_space = spaces.Discrete(int(max_wealth_bound * 100))
+        self.observation_space = spaces.Tuple(
+            (
+                spaces.Box(
+                    0, max_wealth_bound, shape=[1], dtype=np.float32
+                ),  # current wealth
+                spaces.Discrete(max_rounds_bound + 1),  # rounds elapsed
+                spaces.Discrete(max_rounds_bound + 1),  # wins
+                spaces.Discrete(max_rounds_bound + 1),  # losses
+                spaces.Box(0, max_wealth_bound, [1], dtype=np.float32),
+            )
+        )  # maximum observed wealth
        self.reward_range = (0, max_wealth)
        self.edge = edge
        self.wealth = self.initial_wealth
@@ -180,7 +215,7 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
        return [seed]

    def step(self, action):
-        bet_in_dollars = min(action/100.0, self.wealth)
+        bet_in_dollars = min(action / 100.0, self.wealth)

        self.rounds -= 1

@@ -200,25 +235,42 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
        return self._get_obs(), reward, done, {}

    def _get_obs(self):
-        return (np.array([float(self.wealth)]), self.rounds_elapsed, self.wins,
-                self.losses, np.array([float(self.max_ever_wealth)]))
+        return (
+            np.array([float(self.wealth)]),
+            self.rounds_elapsed,
+            self.wins,
+            self.losses,
+            np.array([float(self.max_ever_wealth)]),
+        )

    def reset(self):
        # re-init everything to draw new parameters etc, but preserve the RNG for
        # reproducibility and pass in the same hyper-parameters as originally specified:
-        self.__init__(initial_wealth=self.initial_wealth,
-                      edge_prior_alpha=self.edge_prior_alpha,
-                      edge_prior_beta=self.edge_prior_beta,
-                      max_wealth_alpha=self.max_wealth_alpha,
-                      max_wealth_m=self.max_wealth_m,
-                      max_rounds_mean=self.max_rounds_mean,
-                      max_rounds_sd=self.max_rounds_sd,
-                      reseed=False,
-                      clip_distributions=self.clip_distributions)
+        self.__init__(
+            initial_wealth=self.initial_wealth,
+            edge_prior_alpha=self.edge_prior_alpha,
+            edge_prior_beta=self.edge_prior_beta,
+            max_wealth_alpha=self.max_wealth_alpha,
+            max_wealth_m=self.max_wealth_m,
+            max_rounds_mean=self.max_rounds_mean,
+            max_rounds_sd=self.max_rounds_sd,
+            reseed=False,
+            clip_distributions=self.clip_distributions,
+        )
        return self._get_obs()

-    def render(self, mode='human'):
-        print("Current wealth: ", self.wealth, "; Rounds left: ", self.rounds,
-              "; True edge: ", self.edge, "; True max wealth: ", self.max_wealth,
-              "; True stopping time: ", self.max_rounds, "; Rounds left: ",
-              self.max_rounds - self.rounds_elapsed)
+    def render(self, mode="human"):
+        print(
+            "Current wealth: ",
+            self.wealth,
+            "; Rounds left: ",
+            self.rounds,
+            "; True edge: ",
+            self.edge,
+            "; True max wealth: ",
+            self.max_wealth,
+            "; True stopping time: ",
+            self.max_rounds,
+            "; Rounds left: ",
+            self.max_rounds - self.rounds_elapsed,
+        )
--- a/gym/envs/toy_text/nchain.py
+++ b/gym/envs/toy_text/nchain.py
@@ -22,6 +22,7 @@ class NChainEnv(gym.Env):
    A Bayesian Framework for Reinforcement Learning by Malcolm Strens (2000)
    http://ceit.aut.ac.ir/~shiry/lecture/machine-learning/papers/BRL-2000.pdf
    """
+
    def __init__(self, n=5, slip=0.2, small=2, large=10):
        self.n = n
        self.slip = slip  # probability of 'slipping' an action
--- a/gym/envs/toy_text/roulette.py
+++ b/gym/envs/toy_text/roulette.py
@@ -14,6 +14,7 @@ class RouletteEnv(gym.Env):

    The last action (38) stops the rollout for a return of 0 (walking away)
    """
+
    def __init__(self, spots=37):
        self.n = spots + 1
        self.action_space = spaces.Discrete(self.n)
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -26,7 +26,7 @@ class TaxiEnv(discrete.DiscreteEnv):
    There are four designated locations in the grid world indicated by R(ed), G(reen), Y(ellow), and B(lue). When the episode starts, the taxi starts off at a random square and the passenger is at a random location. The taxi drives to the passenger's location, picks up the passenger, drives to the passenger's destination (another one of the four specified locations), and then drops off the passenger. Once the passenger is dropped off, the episode ends.

    Observations:
-    There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the taxi), and 4 destination locations. 
+    There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the taxi), and 4 destination locations.

    Passenger locations:
    - 0: R(ed)
@@ -65,10 +65,11 @@ class TaxiEnv(discrete.DiscreteEnv):
    state space is represented by:
        (taxi_row, taxi_col, passenger_location, destination)
    """
-    metadata = {'render.modes': ['human', 'ansi']}
+
+    metadata = {"render.modes": ["human", "ansi"]}

    def __init__(self):
-        self.desc = np.asarray(MAP, dtype='c')
+        self.desc = np.asarray(MAP, dtype="c")

        self.locs = locs = [(0, 0), (0, 4), (4, 0), (4, 3)]

@@ -79,8 +80,10 @@ class TaxiEnv(discrete.DiscreteEnv):
        max_col = num_columns - 1
        initial_state_distrib = np.zeros(num_states)
        num_actions = 6
-        P = {state: {action: []
-                     for action in range(num_actions)} for state in range(num_states)}
+        P = {
+            state: {action: [] for action in range(num_actions)}
+            for state in range(num_states)
+        }
        for row in range(num_rows):
            for col in range(num_columns):
                for pass_idx in range(len(locs) + 1):  # +1 for being inside taxi
@@ -91,7 +94,9 @@ class TaxiEnv(discrete.DiscreteEnv):
                        for action in range(num_actions):
                            # defaults
                            new_row, new_col, new_pass_idx = row, col, pass_idx
-                            reward = -1  # default reward when there is no pickup/dropoff
+                            reward = (
+                                -1
+                            )  # default reward when there is no pickup/dropoff
                            done = False
                            taxi_loc = (row, col)

@@ -104,7 +109,7 @@ class TaxiEnv(discrete.DiscreteEnv):
                            elif action == 3 and self.desc[1 + row, 2 * col] == b":":
                                new_col = max(col - 1, 0)
                            elif action == 4:  # pickup
-                                if (pass_idx < 4 and taxi_loc == locs[pass_idx]):
+                                if pass_idx < 4 and taxi_loc == locs[pass_idx]:
                                    new_pass_idx = 4
                                else:  # passenger not at location
                                    reward = -10
@@ -118,12 +123,13 @@ class TaxiEnv(discrete.DiscreteEnv):
                                else:  # dropoff at wrong location
                                    reward = -10
                            new_state = self.encode(
-                                new_row, new_col, new_pass_idx, dest_idx)
-                            P[state][action].append(
-                                (1.0, new_state, reward, done))
+                                new_row, new_col, new_pass_idx, dest_idx
+                            )
+                            P[state][action].append((1.0, new_state, reward, done))
        initial_state_distrib /= initial_state_distrib.sum()
        discrete.DiscreteEnv.__init__(
-            self, num_states, num_actions, P, initial_state_distrib)
+            self, num_states, num_actions, P, initial_state_distrib
+        )

    def encode(self, taxi_row, taxi_col, pass_loc, dest_idx):
        # (5) 5, 5, 4
@@ -148,32 +154,44 @@ class TaxiEnv(discrete.DiscreteEnv):
        assert 0 <= i < 5
        return reversed(out)

-    def render(self, mode='human'):
-        outfile = StringIO() if mode == 'ansi' else sys.stdout
+    def render(self, mode="human"):
+        outfile = StringIO() if mode == "ansi" else sys.stdout

        out = self.desc.copy().tolist()
-        out = [[c.decode('utf-8') for c in line] for line in out]
+        out = [[c.decode("utf-8") for c in line] for line in out]
        taxi_row, taxi_col, pass_idx, dest_idx = self.decode(self.s)

-        def ul(x): return "_" if x == " " else x
+        def ul(x):
+            return "_" if x == " " else x
+
        if pass_idx < 4:
            out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize(
-                out[1 + taxi_row][2 * taxi_col + 1], 'yellow', highlight=True)
+                out[1 + taxi_row][2 * taxi_col + 1], "yellow", highlight=True
+            )
            pi, pj = self.locs[pass_idx]
-            out[1 + pi][2 * pj + 1] = utils.colorize(out[1 + pi][2 * pj + 1], 'blue', bold=True)
+            out[1 + pi][2 * pj + 1] = utils.colorize(
+                out[1 + pi][2 * pj + 1], "blue", bold=True
+            )
        else:  # passenger in taxi
            out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize(
-                ul(out[1 + taxi_row][2 * taxi_col + 1]), 'green', highlight=True)
+                ul(out[1 + taxi_row][2 * taxi_col + 1]), "green", highlight=True
+            )

        di, dj = self.locs[dest_idx]
-        out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], 'magenta')
+        out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], "magenta")
        outfile.write("\n".join(["".join(row) for row in out]) + "\n")
        if self.lastaction is not None:
-            outfile.write("  ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction]))
+            outfile.write(
+                "  ({})\n".format(
+                    ["South", "North", "East", "West", "Pickup", "Dropoff"][
+                        self.lastaction
+                    ]
+                )
+            )
        else:
            outfile.write("\n")

        # No need to return anything for human
-        if mode != 'human':
+        if mode != "human":
            with closing(outfile):
                return outfile.getvalue()
--- a/gym/envs/unittest/init.py
+++ b/gym/envs/unittest/init.py
@@ -2,4 +2,3 @@ from gym.envs.unittest.cube_crash import CubeCrash
 from gym.envs.unittest.cube_crash import CubeCrashSparse
 from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack
 from gym.envs.unittest.memorize_digits import MemorizeDigits
-
--- a/gym/envs/unittest/cube_crash.py
+++ b/gym/envs/unittest/cube_crash.py
@@ -34,27 +34,30 @@ FIELD_W = 32
 FIELD_H = 40
 HOLE_WIDTH = 8

-color_black = np.array((0,0,0)).astype('float32')
-color_white = np.array((255,255,255)).astype('float32')
-color_green = np.array((0,255,0)).astype('float32')
+color_black = np.array((0, 0, 0)).astype("float32")
+color_white = np.array((255, 255, 255)).astype("float32")
+color_green = np.array((0, 255, 0)).astype("float32")
+

 class CubeCrash(gym.Env):
    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second' : 60,
-        'video.res_w' : FIELD_W,
-        'video.res_h' : FIELD_H,
+        "render.modes": ["human", "rgb_array"],
+        "video.frames_per_second": 60,
+        "video.res_w": FIELD_W,
+        "video.res_h": FIELD_H,
    }

    use_shaped_reward = True
-    use_black_screen  = False
-    use_random_colors = False   # Makes env too hard
+    use_black_screen = False
+    use_random_colors = False  # Makes env too hard

    def __init__(self):
        self.seed()
        self.viewer = None

-        self.observation_space = spaces.Box(0, 255, (FIELD_H,FIELD_W,3), dtype=np.uint8)
+        self.observation_space = spaces.Box(
+            0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8
+        )
        self.action_space = spaces.Discrete(3)

        self.reset()
@@ -64,39 +67,59 @@ class CubeCrash(gym.Env):
        return [seed]

    def random_color(self):
-        return np.array([
-            self.np_random.randint(low=0, high=255),
-            self.np_random.randint(low=0, high=255),
-            self.np_random.randint(low=0, high=255),
-            ]).astype('uint8')
+        return np.array(
+            [
+                self.np_random.randint(low=0, high=255),
+                self.np_random.randint(low=0, high=255),
+                self.np_random.randint(low=0, high=255),
+            ]
+        ).astype("uint8")

    def reset(self):
-        self.cube_x = self.np_random.randint(low=3, high=FIELD_W-3)
-        self.cube_y = self.np_random.randint(low=3, high=FIELD_H//6)
-        self.hole_x = self.np_random.randint(low=HOLE_WIDTH, high=FIELD_W-HOLE_WIDTH)
+        self.cube_x = self.np_random.randint(low=3, high=FIELD_W - 3)
+        self.cube_y = self.np_random.randint(low=3, high=FIELD_H // 6)
+        self.hole_x = self.np_random.randint(low=HOLE_WIDTH, high=FIELD_W - HOLE_WIDTH)
        self.bg_color = self.random_color() if self.use_random_colors else color_black
-        self.potential  = None
+        self.potential = None
        self.step_n = 0
        while 1:
-            self.wall_color = self.random_color() if self.use_random_colors else color_white
-            self.cube_color = self.random_color() if self.use_random_colors else color_green
-            if np.linalg.norm(self.wall_color - self.bg_color) < 50 or np.linalg.norm(self.cube_color - self.bg_color) < 50: continue
+            self.wall_color = (
+                self.random_color() if self.use_random_colors else color_white
+            )
+            self.cube_color = (
+                self.random_color() if self.use_random_colors else color_green
+            )
+            if (
+                np.linalg.norm(self.wall_color - self.bg_color) < 50
+                or np.linalg.norm(self.cube_color - self.bg_color) < 50
+            ):
+                continue
            break
        return self.step(0)[0]

    def step(self, action):
-        if action==0: pass
-        elif action==1: self.cube_x -= 1
-        elif action==2: self.cube_x += 1
-        else: assert 0, "Action %i is out of range" % action
+        if action == 0:
+            pass
+        elif action == 1:
+            self.cube_x -= 1
+        elif action == 2:
+            self.cube_x += 1
+        else:
+            assert 0, "Action %i is out of range" % action
        self.cube_y += 1
        self.step_n += 1

-        obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 )
-        obs[:,:,:] = self.bg_color
-        obs[FIELD_H-5:FIELD_H,:,:] = self.wall_color
-        obs[FIELD_H-5:FIELD_H, self.hole_x-HOLE_WIDTH//2:self.hole_x+HOLE_WIDTH//2+1, :] = self.bg_color
-        obs[self.cube_y-1:self.cube_y+2, self.cube_x-1:self.cube_x+2, :] = self.cube_color
+        obs = np.zeros((FIELD_H, FIELD_W, 3), dtype=np.uint8)
+        obs[:, :, :] = self.bg_color
+        obs[FIELD_H - 5 : FIELD_H, :, :] = self.wall_color
+        obs[
+            FIELD_H - 5 : FIELD_H,
+            self.hole_x - HOLE_WIDTH // 2 : self.hole_x + HOLE_WIDTH // 2 + 1,
+            :,
+        ] = self.bg_color
+        obs[
+            self.cube_y - 1 : self.cube_y + 2, self.cube_x - 1 : self.cube_x + 2, :
+        ] = self.cube_color
        if self.use_black_screen and self.step_n > 4:
            obs[:] = np.zeros((3,), dtype=np.uint8)

@@ -107,11 +130,11 @@ class CubeCrash(gym.Env):
            reward = (self.potential - dist) * 0.01
        self.potential = dist

-        if self.cube_x-1 < 0 or self.cube_x+1 >= FIELD_W:
+        if self.cube_x - 1 < 0 or self.cube_x + 1 >= FIELD_W:
            done = True
            reward = -1
-        elif self.cube_y+1 >= FIELD_H-5:
-            if dist >= HOLE_WIDTH//2:
+        elif self.cube_y + 1 >= FIELD_H - 5:
+            if dist >= HOLE_WIDTH // 2:
                done = True
                reward = -1
            elif self.cube_y == FIELD_H:
@@ -120,12 +143,13 @@ class CubeCrash(gym.Env):
        self.last_obs = obs
        return obs, reward, done, {}

-    def render(self, mode='human'):
-        if mode == 'rgb_array':
+    def render(self, mode="human"):
+        if mode == "rgb_array":
            return self.last_obs

-        elif mode == 'human':
+        elif mode == "human":
            from gym.envs.classic_control import rendering
+
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(self.last_obs)
@@ -139,10 +163,11 @@ class CubeCrash(gym.Env):
            self.viewer.close()
            self.viewer = None

+
 class CubeCrashSparse(CubeCrash):
    use_shaped_reward = False

+
 class CubeCrashScreenBecomesBlack(CubeCrash):
    use_shaped_reward = False
    use_black_screen = True
-
--- a/gym/envs/unittest/memorize_digits.py
+++ b/gym/envs/unittest/memorize_digits.py
@@ -32,88 +32,29 @@ from gym.utils import seeding
 FIELD_W = 32
 FIELD_H = 24

-bogus_mnist = \
-[[
-" **** ",
-"*    *",
-"*    *",
-"*    *",
-"*    *",
-" **** "
-], [
-"  **  ",
-" * *  ",
-"   *  ",
-"   *  ",
-"   *  ",
-"  *** "
-], [
-" **** ",
-"*    *",
-"     *",
-"  *** ",
-"**    ",
-"******"
-], [
-" **** ",
-"*    *",
-"   ** ",
-"     *",
-"*    *",
-" **** "
-], [
-" *  * ",
-" *  * ",
-" *  * ",
-" **** ",
-"    * ",
-"    * "
-], [
-" **** ",
-" *    ",
-" **** ",
-"    * ",
-"    * ",
-" **** "
-], [
-"  *** ",
-" *    ",
-" **** ",
-" *  * ",
-" *  * ",
-" **** "
-], [
-" **** ",
-"    * ",
-"   *  ",
-"   *  ",
-"  *   ",
-"  *   "
-], [
-" **** ",
-"*    *",
-" **** ",
-"*    *",
-"*    *",
-" **** "
-], [
-" **** ",
-"*    *",
-"*    *",
-" *****",
-"     *",
-" **** "
-]]
+bogus_mnist = [
+    [" **** ", "*    *", "*    *", "*    *", "*    *", " **** "],
+    ["  **  ", " * *  ", "   *  ", "   *  ", "   *  ", "  *** "],
+    [" **** ", "*    *", "     *", "  *** ", "**    ", "******"],
+    [" **** ", "*    *", "   ** ", "     *", "*    *", " **** "],
+    [" *  * ", " *  * ", " *  * ", " **** ", "    * ", "    * "],
+    [" **** ", " *    ", " **** ", "    * ", "    * ", " **** "],
+    ["  *** ", " *    ", " **** ", " *  * ", " *  * ", " **** "],
+    [" **** ", "    * ", "   *  ", "   *  ", "  *   ", "  *   "],
+    [" **** ", "*    *", " **** ", "*    *", "*    *", " **** "],
+    [" **** ", "*    *", "*    *", " *****", "     *", " **** "],
+]
+
+color_black = np.array((0, 0, 0)).astype("float32")
+color_white = np.array((255, 255, 255)).astype("float32")

-color_black = np.array((0,0,0)).astype('float32')
-color_white = np.array((255,255,255)).astype('float32')

 class MemorizeDigits(gym.Env):
    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second' : 60,
-        'video.res_w' : FIELD_W,
-        'video.res_h' : FIELD_H,
+        "render.modes": ["human", "rgb_array"],
+        "video.frames_per_second": 60,
+        "video.res_w": FIELD_W,
+        "video.res_h": FIELD_H,
    }

    use_random_colors = False
@@ -121,12 +62,16 @@ class MemorizeDigits(gym.Env):
    def __init__(self):
        self.seed()
        self.viewer = None
-        self.observation_space = spaces.Box(0, 255, (FIELD_H,FIELD_W,3), dtype=np.uint8)
+        self.observation_space = spaces.Box(
+            0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8
+        )
        self.action_space = spaces.Discrete(10)
-        self.bogus_mnist = np.zeros( (10,6,6), dtype=np.uint8 )
+        self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8)
        for digit in range(10):
            for y in range(6):
-                self.bogus_mnist[digit,y,:] = [ord(char) for char in bogus_mnist[digit][y]]
+                self.bogus_mnist[digit, y, :] = [
+                    ord(char) for char in bogus_mnist[digit][y]
+                ]
        self.reset()

    def seed(self, seed=None):
@@ -134,20 +79,25 @@ class MemorizeDigits(gym.Env):
        return [seed]

    def random_color(self):
-        return np.array([
-            self.np_random.randint(low=0, high=255),
-            self.np_random.randint(low=0, high=255),
-            self.np_random.randint(low=0, high=255),
-            ]).astype('uint8')
+        return np.array(
+            [
+                self.np_random.randint(low=0, high=255),
+                self.np_random.randint(low=0, high=255),
+                self.np_random.randint(low=0, high=255),
+            ]
+        ).astype("uint8")

    def reset(self):
-        self.digit_x = self.np_random.randint(low=FIELD_W//5, high=FIELD_W//5*4)
-        self.digit_y = self.np_random.randint(low=FIELD_H//5, high=FIELD_H//5*4)
+        self.digit_x = self.np_random.randint(low=FIELD_W // 5, high=FIELD_W // 5 * 4)
+        self.digit_y = self.np_random.randint(low=FIELD_H // 5, high=FIELD_H // 5 * 4)
        self.color_bg = self.random_color() if self.use_random_colors else color_black
        self.step_n = 0
        while 1:
-            self.color_digit = self.random_color() if self.use_random_colors else color_white
-            if np.linalg.norm(self.color_digit - self.color_bg) < 50: continue
+            self.color_digit = (
+                self.random_color() if self.use_random_colors else color_white
+            )
+            if np.linalg.norm(self.color_digit - self.color_bg) < 50:
+                continue
            break
        self.digit = -1
        return self.step(0)[0]
@@ -156,29 +106,32 @@ class MemorizeDigits(gym.Env):
        reward = -1
        done = False
        self.step_n += 1
-        if self.digit==-1:
+        if self.digit == -1:
            pass
        else:
-            if self.digit==action:
+            if self.digit == action:
                reward = +1
-            done = self.step_n > 20 and 0==self.np_random.randint(low=0, high=5)
+            done = self.step_n > 20 and 0 == self.np_random.randint(low=0, high=5)
        self.digit = self.np_random.randint(low=0, high=10)
-        obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 )
-        obs[:,:,:] = self.color_bg
-        digit_img = np.zeros( (6,6,3), dtype=np.uint8 )
+        obs = np.zeros((FIELD_H, FIELD_W, 3), dtype=np.uint8)
+        obs[:, :, :] = self.color_bg
+        digit_img = np.zeros((6, 6, 3), dtype=np.uint8)
        digit_img[:] = self.color_bg
-        xxx = self.bogus_mnist[self.digit]==42
+        xxx = self.bogus_mnist[self.digit] == 42
        digit_img[xxx] = self.color_digit
-        obs[self.digit_y-3:self.digit_y+3, self.digit_x-3:self.digit_x+3] = digit_img
+        obs[
+            self.digit_y - 3 : self.digit_y + 3, self.digit_x - 3 : self.digit_x + 3
+        ] = digit_img
        self.last_obs = obs
        return obs, reward, done, {}

-    def render(self, mode='human'):
-        if mode == 'rgb_array':
+    def render(self, mode="human"):
+        if mode == "rgb_array":
            return self.last_obs

-        elif mode == 'human':
+        elif mode == "human":
            from gym.envs.classic_control import rendering
+
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(self.last_obs)
@@ -191,4 +144,3 @@ class MemorizeDigits(gym.Env):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
-
--- a/gym/error.py
+++ b/gym/error.py
@@ -1,87 +1,118 @@
 import sys

+
 class Error(Exception):
    pass

+
 # Local errors

+
 class Unregistered(Error):
    """Raised when the user requests an item from the registry that does
    not actually exist.
    """
+
    pass

+
 class UnregisteredEnv(Unregistered):
    """Raised when the user requests an env from the registry that does
    not actually exist.
    """
+
    pass

+
 class UnregisteredBenchmark(Unregistered):
    """Raised when the user requests an env from the registry that does
    not actually exist.
    """
+
    pass

+
 class DeprecatedEnv(Error):
    """Raised when the user requests an env from the registry with an
    older version number than the latest env with the same name.
    """
+
    pass

+
 class UnseedableEnv(Error):
    """Raised when the user tries to seed an env that does not support
    seeding.
    """
+
    pass

+
 class DependencyNotInstalled(Error):
    pass

+
 class UnsupportedMode(Exception):
    """Raised when the user requests a rendering mode not supported by the
    environment.
    """
+
    pass

+
 class ResetNeeded(Exception):
    """When the monitor is active, raised when the user tries to step an
    environment that's already done.
    """
+
    pass

+
 class ResetNotAllowed(Exception):
    """When the monitor is active, raised when the user tries to step an
    environment that's not yet done.
    """
+
    pass

+
 class InvalidAction(Exception):
    """Raised when the user performs an action not contained within the
    action space
    """
+
    pass

+
 # API errors

+
 class APIError(Error):
-    def __init__(self, message=None, http_body=None, http_status=None,
-                 json_body=None, headers=None):
+    def __init__(
+        self,
+        message=None,
+        http_body=None,
+        http_status=None,
+        json_body=None,
+        headers=None,
+    ):
        super(APIError, self).__init__(message)

-        if http_body and hasattr(http_body, 'decode'):
+        if http_body and hasattr(http_body, "decode"):
            try:
-                http_body = http_body.decode('utf-8')
+                http_body = http_body.decode("utf-8")
            except:
-                http_body = ('<Could not decode body as utf-8. '
-                             'Please report to gym@openai.com>')
+                http_body = (
+                    "<Could not decode body as utf-8. "
+                    "Please report to gym@openai.com>"
+                )

        self._message = message
        self.http_body = http_body
        self.http_status = http_status
        self.json_body = json_body
        self.headers = headers or {}
-        self.request_id = self.headers.get('request-id', None)
+        self.request_id = self.headers.get("request-id", None)

    def __unicode__(self):
        if self.request_id is not None:
@@ -91,8 +122,8 @@ class APIError(Error):
            return self._message

    def __str__(self):
-        try:               # Python 2
-            return unicode(self).encode('utf-8')
+        try:  # Python 2
+            return unicode(self).encode("utf-8")
        except NameError:  # Python 3
            return self.__unicode__()

@@ -102,31 +133,43 @@ class APIConnectionError(APIError):


 class InvalidRequestError(APIError):
-
-    def __init__(self, message, param, http_body=None,
-                 http_status=None, json_body=None, headers=None):
+    def __init__(
+        self,
+        message,
+        param,
+        http_body=None,
+        http_status=None,
+        json_body=None,
+        headers=None,
+    ):
        super(InvalidRequestError, self).__init__(
-            message, http_body, http_status, json_body,
-            headers)
+            message, http_body, http_status, json_body, headers
+        )
        self.param = param


 class AuthenticationError(APIError):
    pass

+
 class RateLimitError(APIError):
    pass

+
 # Video errors

+
 class VideoRecorderError(Error):
    pass

+
 class InvalidFrame(Error):
    pass

+
 # Wrapper errors

+
 class DoubleWrapperError(Error):
    pass

@@ -138,8 +181,10 @@ class WrapAfterConfigureError(Error):
 class RetriesExceededError(Error):
    pass

+
 # Vectorized environments errors

+
 class AlreadyPendingCallError(Exception):
    """
    Raised when `reset`, or `step` is called asynchronously (e.g. with
@@ -147,28 +192,35 @@ class AlreadyPendingCallError(Exception):
    `step_async` (respectively) is called again (without a complete call to
    `reset_wait`, or `step_wait` respectively).
    """
+
    def __init__(self, message, name):
        super(AlreadyPendingCallError, self).__init__(message)
        self.name = name

+
 class NoAsyncCallError(Exception):
    """
    Raised when an asynchronous `reset`, or `step` is not running, but
    `reset_wait`, or `step_wait` (respectively) is called.
    """
+
    def __init__(self, message, name):
        super(NoAsyncCallError, self).__init__(message)
        self.name = name

+
 class ClosedEnvironmentError(Exception):
    """
    Trying to call `reset`, or `step`, while the environment is closed.
    """
+
    pass

+
 class CustomSpaceError(Exception):
    """
    The space is a custom gym.Space instance, and is not supported by
    `AsyncVectorEnv` with `shared_memory=True`.
    """
+
    pass
--- a/gym/logger.py
+++ b/gym/logger.py
@@ -10,6 +10,7 @@ DISABLED = 50

 MIN_LEVEL = 30

+
 def set_level(level):
    """
    Set logging threshold on current logger.
@@ -17,21 +18,26 @@ def set_level(level):
    global MIN_LEVEL
    MIN_LEVEL = level

+
 def debug(msg, *args):
    if MIN_LEVEL <= DEBUG:
-        print('%s: %s'%('DEBUG', msg % args))
+        print("%s: %s" % ("DEBUG", msg % args))
+

 def info(msg, *args):
    if MIN_LEVEL <= INFO:
-        print('%s: %s'%('INFO', msg % args))
+        print("%s: %s" % ("INFO", msg % args))
+

 def warn(msg, *args):
    if MIN_LEVEL <= WARN:
-        warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
+        warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow"))
+

 def error(msg, *args):
    if MIN_LEVEL <= ERROR:
-        print(colorize('%s: %s'%('ERROR', msg % args), 'red'))
+        print(colorize("%s: %s" % ("ERROR", msg % args), "red"))
+

 # DEPRECATED:
 setLevel = set_level
--- a/gym/spaces/init.py
+++ b/gym/spaces/init.py
@@ -11,4 +11,16 @@ from gym.spaces.utils import flatten_space
 from gym.spaces.utils import flatten
 from gym.spaces.utils import unflatten

-__all__ = ["Space", "Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict", "flatdim", "flatten_space", "flatten", "unflatten"]
+__all__ = [
+    "Space",
+    "Box",
+    "Discrete",
+    "MultiDiscrete",
+    "MultiBinary",
+    "Tuple",
+    "Dict",
+    "flatdim",
+    "flatten_space",
+    "flatten",
+    "unflatten",
+]
--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -21,23 +21,34 @@ class Box(Space):
        Box(2,)

    """
+
    def __init__(self, low, high, shape=None, dtype=np.float32):
-        assert dtype is not None, 'dtype must be explicitly provided. '
+        assert dtype is not None, "dtype must be explicitly provided. "
        self.dtype = np.dtype(dtype)

        # determine shape if it isn't provided directly
        if shape is not None:
            shape = tuple(shape)
-            assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
-            assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
+            assert (
+                np.isscalar(low) or low.shape == shape
+            ), "low.shape doesn't match provided shape"
+            assert (
+                np.isscalar(high) or high.shape == shape
+            ), "high.shape doesn't match provided shape"
        elif not np.isscalar(low):
            shape = low.shape
-            assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
+            assert (
+                np.isscalar(high) or high.shape == shape
+            ), "high.shape doesn't match low.shape"
        elif not np.isscalar(high):
            shape = high.shape
-            assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
+            assert (
+                np.isscalar(low) or low.shape == shape
+            ), "low.shape doesn't match high.shape"
        else:
-            raise ValueError("shape must be provided or inferred from the shapes of low or high")
+            raise ValueError(
+                "shape must be provided or inferred from the shapes of low or high"
+            )

        if np.isscalar(low):
            low = np.full(shape, low, dtype=dtype)
@@ -54,11 +65,14 @@ class Box(Space):
                return np.finfo(dtype).precision
            else:
                return np.inf
+
        low_precision = _get_precision(self.low.dtype)
        high_precision = _get_precision(self.high.dtype)
        dtype_precision = _get_precision(self.dtype)
        if min(low_precision, high_precision) > dtype_precision:
-            logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
+            logger.warn(
+                "Box bound precision lowered by casting to {}".format(self.dtype)
+            )
        self.low = self.low.astype(self.dtype)
        self.high = self.high.astype(self.dtype)

@@ -92,32 +106,33 @@ class Box(Space):
        * (-oo, b] : shifted negative exponential distribution
        * (-oo, oo) : normal distribution
        """
-        high = self.high if self.dtype.kind == 'f' \
-                else self.high.astype('int64') + 1
+        high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1
        sample = np.empty(self.shape)

        # Masking arrays which classify the coordinates according to interval
        # type
-        unbounded   = ~self.bounded_below & ~self.bounded_above
-        upp_bounded = ~self.bounded_below &  self.bounded_above
-        low_bounded =  self.bounded_below & ~self.bounded_above
-        bounded     =  self.bounded_below &  self.bounded_above
-
+        unbounded = ~self.bounded_below & ~self.bounded_above
+        upp_bounded = ~self.bounded_below & self.bounded_above
+        low_bounded = self.bounded_below & ~self.bounded_above
+        bounded = self.bounded_below & self.bounded_above

        # Vectorized sampling by interval type
-        sample[unbounded] = self.np_random.normal(
-                size=unbounded[unbounded].shape)
+        sample[unbounded] = self.np_random.normal(size=unbounded[unbounded].shape)

-        sample[low_bounded] = self.np_random.exponential(
-            size=low_bounded[low_bounded].shape) + self.low[low_bounded]
+        sample[low_bounded] = (
+            self.np_random.exponential(size=low_bounded[low_bounded].shape)
+            + self.low[low_bounded]
+        )

-        sample[upp_bounded] = -self.np_random.exponential(
-            size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
+        sample[upp_bounded] = (
+            -self.np_random.exponential(size=upp_bounded[upp_bounded].shape)
+            + self.high[upp_bounded]
+        )

-        sample[bounded] = self.np_random.uniform(low=self.low[bounded],
-                                            high=high[bounded],
-                                            size=bounded[bounded].shape)
-        if self.dtype.kind == 'i':
+        sample[bounded] = self.np_random.uniform(
+            low=self.low[bounded], high=high[bounded], size=bounded[bounded].shape
+        )
+        if self.dtype.kind == "i":
            sample = np.floor(sample)

        return sample.astype(self.dtype)
@@ -125,7 +140,9 @@ class Box(Space):
    def contains(self, x):
        if isinstance(x, list):
            x = np.array(x)  # Promote list to array for contains check
-        return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
+        return (
+            x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
+        )

    def to_jsonable(self, sample_n):
        return np.array(sample_n).tolist()
@@ -134,7 +151,14 @@ class Box(Space):
        return [np.asarray(sample) for sample in sample_n]

    def __repr__(self):
-        return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
+        return "Box({}, {}, {}, {})".format(
+            self.low.min(), self.high.max(), self.shape, self.dtype
+        )

    def __eq__(self, other):
-        return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
+        return (
+            isinstance(other, Box)
+            and (self.shape == other.shape)
+            and np.allclose(self.low, other.low)
+            and np.allclose(self.high, other.high)
+        )
--- a/gym/spaces/dict.py
+++ b/gym/spaces/dict.py
@@ -31,8 +31,11 @@ class Dict(Space):
        })
    })
    """
+
    def __init__(self, spaces=None, **spaces_kwargs):
-        assert (spaces is None) or (not spaces_kwargs), 'Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)'
+        assert (spaces is None) or (
+            not spaces_kwargs
+        ), "Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)"
        if spaces is None:
            spaces = spaces_kwargs
        if isinstance(spaces, dict) and not isinstance(spaces, OrderedDict):
@@ -41,8 +44,12 @@ class Dict(Space):
            spaces = OrderedDict(spaces)
        self.spaces = spaces
        for space in spaces.values():
-            assert isinstance(space, Space), 'Values of the dict should be instances of gym.Space'
-        super(Dict, self).__init__(None, None) # None for shape and dtype, since it'll require special handling
+            assert isinstance(
+                space, Space
+            ), "Values of the dict should be instances of gym.Space"
+        super(Dict, self).__init__(
+            None, None
+        )  # None for shape and dtype, since it'll require special handling

    def seed(self, seed=None):
        [space.seed(seed) for space in self.spaces.values()]
@@ -62,18 +69,24 @@ class Dict(Space):

    def __getitem__(self, key):
        return self.spaces[key]
-        
+
    def __iter__(self):
        for key in self.spaces:
            yield key

    def __repr__(self):
-        return "Dict(" + ", ". join([str(k) + ":" + str(s) for k, s in self.spaces.items()]) + ")"
+        return (
+            "Dict("
+            + ", ".join([str(k) + ":" + str(s) for k, s in self.spaces.items()])
+            + ")"
+        )

    def to_jsonable(self, sample_n):
        # serialize as dict-repr of vectors
-        return {key: space.to_jsonable([sample[key] for sample in sample_n]) \
-                for key, space in self.spaces.items()}
+        return {
+            key: space.to_jsonable([sample[key] for sample in sample_n])
+            for key, space in self.spaces.items()
+        }

    def from_jsonable(self, sample_n):
        dict_of_list = {}
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -3,13 +3,14 @@ from .space import Space


 class Discrete(Space):
-    r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 
+    r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.

    Example::

        >>> Discrete(2)

    """
+
    def __init__(self, n):
        assert n >= 0
        self.n = n
@@ -21,7 +22,9 @@ class Discrete(Space):
    def contains(self, x):
        if isinstance(x, int):
            as_int = x
-        elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
+        elif isinstance(x, (np.generic, np.ndarray)) and (
+            x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
+        ):
            as_int = int(x)
        else:
            return False
--- a/gym/spaces/multi_binary.py
+++ b/gym/spaces/multi_binary.py
@@ -3,13 +3,13 @@ from .space import Space


 class MultiBinary(Space):
-    '''
-    An n-shape binary space. 
+    """
+    An n-shape binary space.

    The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
-    
+
    Example Usage:
-    
+
    >> self.observation_space = spaces.MultiBinary(5)

    >> self.observation_space.sample()
@@ -21,16 +21,17 @@ class MultiBinary(Space):
    >> self.observation_space.sample()

        array([[0, 0],
-               [0, 1],   
+               [0, 1],
               [1, 1]], dtype=int8)

-    '''
+    """
+
    def __init__(self, n):
        self.n = n
        if type(n) in [tuple, list, np.ndarray]:
            input_n = n
        else:
-            input_n = (n, )
+            input_n = (n,)
        super(MultiBinary, self).__init__(input_n, np.int8)

    def sample(self):
@@ -41,7 +42,7 @@ class MultiBinary(Space):
            x = np.array(x)  # Promote list to array for contains check
        if self.shape != x.shape:
            return False
-        return ((x==0) | (x==1)).all()
+        return ((x == 0) | (x == 1)).all()

    def to_jsonable(self, sample_n):
        return np.array(sample_n).tolist()
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -22,18 +22,21 @@ class MultiDiscrete(Space):
        MultiDiscrete([ 5, 2, 2 ])

    """
+
    def __init__(self, nvec, dtype=np.int64):

        """
        nvec: vector of counts of each categorical variable
        """
-        assert (np.array(nvec) > 0).all(), 'nvec (counts) have to be positive'
+        assert (np.array(nvec) > 0).all(), "nvec (counts) have to be positive"
        self.nvec = np.asarray(nvec, dtype=dtype)

        super(MultiDiscrete, self).__init__(self.nvec.shape, dtype)

    def sample(self):
-        return (self.np_random.random_sample(self.nvec.shape)*self.nvec).astype(self.dtype)
+        return (self.np_random.random_sample(self.nvec.shape) * self.nvec).astype(
+            self.dtype
+        )

    def contains(self, x):
        if isinstance(x, list):
--- a/gym/spaces/space.py
+++ b/gym/spaces/space.py
@@ -15,8 +15,10 @@ class Space(object):
    Moreover, some implementations of Reinforcement Learning algorithms might
    not handle custom spaces properly. Use custom spaces with care.
    """
+
    def __init__(self, shape=None, dtype=None):
        import numpy as np  # takes about 300-400ms to import, so we load lazily
+
        self.shape = None if shape is None else tuple(shape)
        self.dtype = None if dtype is None else np.dtype(dtype)
        self._np_random = None
@@ -32,12 +34,12 @@ class Space(object):
        return self._np_random

    def sample(self):
-        """Randomly sample an element of this space. Can be 
+        """Randomly sample an element of this space. Can be
        uniform or non-uniform sampling based on boundedness of space."""
        raise NotImplementedError

    def seed(self, seed=None):
-        """Seed the PRNG of this space. """
+        """Seed the PRNG of this space."""
        self._np_random, seed = seeding.np_random(seed)
        return [seed]

--- a/gym/spaces/tests/test_spaces.py
+++ b/gym/spaces/tests/test_spaces.py
@@ -7,17 +7,31 @@ import pytest
 from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict


-@pytest.mark.parametrize("space", [
-    Discrete(3),
-    Box(low=0., high=np.inf, shape=(2,2)),
-    Tuple([Discrete(5), Discrete(10)]),
-    Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
-    Tuple((Discrete(5), Discrete(2), Discrete(2))),
-    MultiDiscrete([2, 2, 100]),
-    MultiBinary(10),
-    Dict({"position": Discrete(5),
-          "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
-])
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
 def test_roundtripping(space):
    sample_1 = space.sample()
    sample_2 = space.sample()
@@ -38,53 +52,77 @@ def test_roundtripping(space):
    assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p)


-@pytest.mark.parametrize("space", [
-    Discrete(3),
-    Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
-    Box(low=-np.inf, high=np.inf, shape=(1,3)),
-    Tuple([Discrete(5), Discrete(10)]),
-    Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
-    Tuple((Discrete(5), Discrete(2), Discrete(2))),
-    MultiDiscrete([2, 2, 100]),
-    MultiBinary(6),
-    Dict({"position": Discrete(5),
-          "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
-])
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+        Box(low=-np.inf, high=np.inf, shape=(1, 3)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(6),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
 def test_equality(space):
    space1 = space
    space2 = copy(space)
    assert space1 == space2, "Expected {} to equal {}".format(space1, space2)


-@pytest.mark.parametrize("spaces", [
-    (Discrete(3), Discrete(4)),
-    (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
-    (MultiBinary(8), MultiBinary(7)),
-    (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
-     Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
-    (Box(low=-np.inf,high=0., shape=(2,1)), 
-        Box(low=0., high=np.inf, shape=(2,1))),
-    (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
-    (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
-    (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
-])
+@pytest.mark.parametrize(
+    "spaces",
+    [
+        (Discrete(3), Discrete(4)),
+        (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
+        (MultiBinary(8), MultiBinary(7)),
+        (
+            Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+            Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32),
+        ),
+        (
+            Box(low=-np.inf, high=0.0, shape=(2, 1)),
+            Box(low=0.0, high=np.inf, shape=(2, 1)),
+        ),
+        (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
+        (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
+        (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
+    ],
+)
 def test_inequality(spaces):
    space1, space2 = spaces
    assert space1 != space2, "Expected {} != {}".format(space1, space2)


-@pytest.mark.parametrize("space", [
-    Discrete(5),
-    Box(low=0, high=255, shape=(2,), dtype='uint8'),
-    Box(low=-np.inf, high=np.inf, shape=(3,3)),
-    Box(low=1., high=np.inf, shape=(3,3)),
-    Box(low=-np.inf, high=2., shape=(3,3)),
-])
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(5),
+        Box(low=0, high=255, shape=(2,), dtype="uint8"),
+        Box(low=-np.inf, high=np.inf, shape=(3, 3)),
+        Box(low=1.0, high=np.inf, shape=(3, 3)),
+        Box(low=-np.inf, high=2.0, shape=(3, 3)),
+    ],
+)
 def test_sample(space):
    space.seed(0)
    n_trials = 100
    samples = np.array([space.sample() for _ in range(n_trials)])
-    expected_mean = 0.
+    expected_mean = 0.0
    if isinstance(space, Box):
        if space.is_bounded():
            expected_mean = (space.high + space.low) / 2
@@ -93,23 +131,35 @@ def test_sample(space):
        elif space.is_bounded("above"):
            expected_mean = -1 + space.high
        else:
-            expected_mean = 0.
+            expected_mean = 0.0
    elif isinstance(space, Discrete):
        expected_mean = space.n / 2
    else:
        raise NotImplementedError
    np.testing.assert_allclose(expected_mean, samples.mean(), atol=3.0 * samples.std())

-@pytest.mark.parametrize("spaces", [
-    (Discrete(5), MultiBinary(5)),
-    (Box(low=np.array([-10, 0]), high=np.array([10,10]), dtype=np.float32), MultiDiscrete([2, 2, 8])),
-    (Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8), Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8)),
-    (Dict({"position": Discrete(5)}), Tuple([Discrete(5)])),
-    (Dict({"position": Discrete(5)}), Discrete(5)),
-    (Tuple((Discrete(5),)), Discrete(5)),
-    (Box(low=np.array([-np.inf,0.]), high=np.array([0., np.inf])),
-        Box(low=np.array([-np.inf, 1.]), high=np.array([0., np.inf])))
-])
+
+@pytest.mark.parametrize(
+    "spaces",
+    [
+        (Discrete(5), MultiBinary(5)),
+        (
+            Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+            MultiDiscrete([2, 2, 8]),
+        ),
+        (
+            Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8),
+            Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8),
+        ),
+        (Dict({"position": Discrete(5)}), Tuple([Discrete(5)])),
+        (Dict({"position": Discrete(5)}), Discrete(5)),
+        (Tuple((Discrete(5),)), Discrete(5)),
+        (
+            Box(low=np.array([-np.inf, 0.0]), high=np.array([0.0, np.inf])),
+            Box(low=np.array([-np.inf, 1.0]), high=np.array([0.0, np.inf])),
+        ),
+    ],
+)
 def test_class_inequality(spaces):
    assert spaces[0] == spaces[0]
    assert spaces[1] == spaces[1]
@@ -117,11 +167,14 @@ def test_class_inequality(spaces):
    assert spaces[1] != spaces[0]


-@pytest.mark.parametrize("space_fn", [
-    lambda: Dict(space1='abc'),
-    lambda: Dict({'space1': 'abc'}),
-    lambda: Tuple(['abc'])
-])
+@pytest.mark.parametrize(
+    "space_fn",
+    [
+        lambda: Dict(space1="abc"),
+        lambda: Dict({"space1": "abc"}),
+        lambda: Tuple(["abc"]),
+    ],
+)
 def test_bad_space_calls(space_fn):
    with pytest.raises(AssertionError):
        space_fn()
--- a/gym/spaces/tests/test_utils.py
+++ b/gym/spaces/tests/test_utils.py
@@ -6,97 +6,181 @@ import pytest
 from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Tuple, utils


-@pytest.mark.parametrize(["space", "flatdim"], [
-    (Discrete(3), 3),
-    (Box(low=0., high=np.inf, shape=(2, 2)), 4),
-    (Tuple([Discrete(5), Discrete(10)]), 15),
-    (Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), 7),
-    (Tuple((Discrete(5), Discrete(2), Discrete(2))), 9),
-    (MultiDiscrete([2, 2, 100]), 3),
-    (MultiBinary(10), 10),
-    (Dict({"position": Discrete(5),
-           "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}), 7),
-])
+@pytest.mark.parametrize(
+    ["space", "flatdim"],
+    [
+        (Discrete(3), 3),
+        (Box(low=0.0, high=np.inf, shape=(2, 2)), 4),
+        (Tuple([Discrete(5), Discrete(10)]), 15),
+        (
+            Tuple(
+                [
+                    Discrete(5),
+                    Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+                ]
+            ),
+            7,
+        ),
+        (Tuple((Discrete(5), Discrete(2), Discrete(2))), 9),
+        (MultiDiscrete([2, 2, 100]), 3),
+        (MultiBinary(10), 10),
+        (
+            Dict(
+                {
+                    "position": Discrete(5),
+                    "velocity": Box(
+                        low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                    ),
+                }
+            ),
+            7,
+        ),
+    ],
+)
 def test_flatdim(space, flatdim):
    dim = utils.flatdim(space)
    assert dim == flatdim, "Expected {} to equal {}".format(dim, flatdim)


-@pytest.mark.parametrize("space", [
-    Discrete(3),
-    Box(low=0., high=np.inf, shape=(2, 2)),
-    Tuple([Discrete(5), Discrete(10)]),
-    Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
-    Tuple((Discrete(5), Discrete(2), Discrete(2))),
-    MultiDiscrete([2, 2, 100]),
-    MultiBinary(10),
-    Dict({"position": Discrete(5),
-          "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
-    ])
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
 def test_flatten_space_boxes(space):
    flat_space = utils.flatten_space(space)
-    assert isinstance(flat_space, Box), "Expected {} to equal {}".format(type(flat_space), Box)
+    assert isinstance(flat_space, Box), "Expected {} to equal {}".format(
+        type(flat_space), Box
+    )
    flatdim = utils.flatdim(space)
-    (single_dim, ) = flat_space.shape
+    (single_dim,) = flat_space.shape
    assert single_dim == flatdim, "Expected {} to equal {}".format(single_dim, flatdim)


-@pytest.mark.parametrize("space", [
-    Discrete(3),
-    Box(low=0., high=np.inf, shape=(2, 2)),
-    Tuple([Discrete(5), Discrete(10)]),
-    Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
-    Tuple((Discrete(5), Discrete(2), Discrete(2))),
-    MultiDiscrete([2, 2, 100]),
-    MultiBinary(10),
-    Dict({"position": Discrete(5),
-          "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
-    ])
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
 def test_flat_space_contains_flat_points(space):
    some_samples = [space.sample() for _ in range(10)]
    flattened_samples = [utils.flatten(space, sample) for sample in some_samples]
    flat_space = utils.flatten_space(space)
    for i, flat_sample in enumerate(flattened_samples):
-        assert flat_sample in flat_space,\
-            'Expected sample #{} {} to be in {}'.format(i, flat_sample, flat_space)
+        assert flat_sample in flat_space, "Expected sample #{} {} to be in {}".format(
+            i, flat_sample, flat_space
+        )


-@pytest.mark.parametrize("space", [
-    Discrete(3),
-    Box(low=0., high=np.inf, shape=(2, 2)),
-    Tuple([Discrete(5), Discrete(10)]),
-    Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
-    Tuple((Discrete(5), Discrete(2), Discrete(2))),
-    MultiDiscrete([2, 2, 100]),
-    MultiBinary(10),
-    Dict({"position": Discrete(5),
-          "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
-    ])
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
 def test_flatten_dim(space):
    sample = utils.flatten(space, space.sample())
-    (single_dim, ) = sample.shape
+    (single_dim,) = sample.shape
    flatdim = utils.flatdim(space)
    assert single_dim == flatdim, "Expected {} to equal {}".format(single_dim, flatdim)


-@pytest.mark.parametrize("space", [
-    Discrete(3),
-    Box(low=0., high=np.inf, shape=(2, 2)),
-    Tuple([Discrete(5), Discrete(10)]),
-    Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
-    Tuple((Discrete(5), Discrete(2), Discrete(2))),
-    MultiDiscrete([2, 2, 100]),
-    MultiBinary(10),
-    Dict({"position": Discrete(5),
-          "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
-])
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
 def test_flatten_roundtripping(space):
    some_samples = [space.sample() for _ in range(10)]
    flattened_samples = [utils.flatten(space, sample) for sample in some_samples]
-    roundtripped_samples = [utils.unflatten(space, sample) for sample in flattened_samples]
-    for i, (original, roundtripped) in enumerate(zip(some_samples, roundtripped_samples)):
-        assert compare_nested(original, roundtripped), \
-            'Expected sample #{} {} to equal {}'.format(i, original, roundtripped)
+    roundtripped_samples = [
+        utils.unflatten(space, sample) for sample in flattened_samples
+    ]
+    for i, (original, roundtripped) in enumerate(
+        zip(some_samples, roundtripped_samples)
+    ):
+        assert compare_nested(
+            original, roundtripped
+        ), "Expected sample #{} {} to equal {}".format(i, original, roundtripped)


 def compare_nested(left, right):
@@ -104,7 +188,9 @@ def compare_nested(left, right):
        return np.allclose(left, right)
    elif isinstance(left, OrderedDict) and isinstance(right, OrderedDict):
        res = len(left) == len(right)
-        for ((left_key, left_value), (right_key, right_value)) in zip(left.items(), right.items()):
+        for ((left_key, left_value), (right_key, right_value)) in zip(
+            left.items(), right.items()
+        ):
            if not res:
                return False
            res = left_key == right_key and compare_nested(left_value, right_value)
@@ -119,25 +205,48 @@ def compare_nested(left, right):
    else:
        return left == right

-'''
+
+"""
 Expecteded flattened types are based off:
 1. The type that the space is hardcoded as(ie. multi_discrete=np.int64, discrete=np.int64, multi_binary=np.int8)
 2. The type that the space is instantiated with(ie. box=np.float32 by default unless instantiated with a different type)
-3. The smallest type that the composite space(tuple, dict) can be represented as. In flatten, this is determined 
-   internally by numpy when np.concatenate is called. 
-'''
-@pytest.mark.parametrize(["original_space", "expected_flattened_dtype"], [
-    (Discrete(3), np.int64),
-    (Box(low=0., high=np.inf, shape=(2, 2)), np.float32),
-    (Box(low=0., high=np.inf, shape=(2, 2), dtype=np.float16), np.float16),
-    (Tuple([Discrete(5), Discrete(10)]), np.int64),
-    (Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), np.float64),
-    (Tuple((Discrete(5), Discrete(2), Discrete(2))), np.int64),
-    (MultiDiscrete([2, 2, 100]), np.int64),
-    (MultiBinary(10), np.int8),
-    (Dict({"position": Discrete(5),
-           "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float16)}), np.float64),
-])
+3. The smallest type that the composite space(tuple, dict) can be represented as. In flatten, this is determined
+   internally by numpy when np.concatenate is called.
+"""
+
+
+@pytest.mark.parametrize(
+    ["original_space", "expected_flattened_dtype"],
+    [
+        (Discrete(3), np.int64),
+        (Box(low=0.0, high=np.inf, shape=(2, 2)), np.float32),
+        (Box(low=0.0, high=np.inf, shape=(2, 2), dtype=np.float16), np.float16),
+        (Tuple([Discrete(5), Discrete(10)]), np.int64),
+        (
+            Tuple(
+                [
+                    Discrete(5),
+                    Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+                ]
+            ),
+            np.float64,
+        ),
+        (Tuple((Discrete(5), Discrete(2), Discrete(2))), np.int64),
+        (MultiDiscrete([2, 2, 100]), np.int64),
+        (MultiBinary(10), np.int8),
+        (
+            Dict(
+                {
+                    "position": Discrete(5),
+                    "velocity": Box(
+                        low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float16
+                    ),
+                }
+            ),
+            np.float64,
+        ),
+    ],
+)
 def test_dtypes(original_space, expected_flattened_dtype):
    flattened_space = utils.flatten_space(original_space)

@@ -145,28 +254,41 @@ def test_dtypes(original_space, expected_flattened_dtype):
    flattened_sample = utils.flatten(original_space, original_sample)
    unflattened_sample = utils.unflatten(original_space, flattened_sample)

-    assert flattened_space.contains(flattened_sample), "Expected flattened_space to contain flattened_sample"
-    assert flattened_space.dtype == expected_flattened_dtype, "Expected flattened_space's dtype to equal " \
-                                                              "{}".format(expected_flattened_dtype)
+    assert flattened_space.contains(
+        flattened_sample
+    ), "Expected flattened_space to contain flattened_sample"
+    assert (
+        flattened_space.dtype == expected_flattened_dtype
+    ), "Expected flattened_space's dtype to equal " "{}".format(
+        expected_flattened_dtype
+    )

-    assert flattened_sample.dtype == flattened_space.dtype, "Expected flattened_space's dtype to equal " \
-                                                            "flattened_sample's dtype "
+    assert flattened_sample.dtype == flattened_space.dtype, (
+        "Expected flattened_space's dtype to equal " "flattened_sample's dtype "
+    )

    compare_sample_types(original_space, original_sample, unflattened_sample)


 def compare_sample_types(original_space, original_sample, unflattened_sample):
    if isinstance(original_space, Discrete):
-        assert isinstance(unflattened_sample, int), "Expected unflattened_sample to be an int. unflattened_sample: " \
-                                                    "{} original_sample: {}".format(unflattened_sample, original_sample)
+        assert isinstance(unflattened_sample, int), (
+            "Expected unflattened_sample to be an int. unflattened_sample: "
+            "{} original_sample: {}".format(unflattened_sample, original_sample)
+        )
    elif isinstance(original_space, Tuple):
        for index in range(len(original_space)):
-            compare_sample_types(original_space.spaces[index], original_sample[index], unflattened_sample[index])
+            compare_sample_types(
+                original_space.spaces[index],
+                original_sample[index],
+                unflattened_sample[index],
+            )
    elif isinstance(original_space, Dict):
        for key, space in original_space.spaces.items():
            compare_sample_types(space, original_sample[key], unflattened_sample[key])
    else:
-        assert unflattened_sample.dtype == original_sample.dtype, "Expected unflattened_sample's dtype to equal " \
-                                                                  "original_sample's dtype. unflattened_sample: " \
-                                                                  "{} original_sample: {}".format(unflattened_sample,
-                                                                                                  original_sample)
+        assert unflattened_sample.dtype == original_sample.dtype, (
+            "Expected unflattened_sample's dtype to equal "
+            "original_sample's dtype. unflattened_sample: "
+            "{} original_sample: {}".format(unflattened_sample, original_sample)
+        )
--- a/gym/spaces/tuple.py
+++ b/gym/spaces/tuple.py
@@ -9,10 +9,13 @@ class Tuple(Space):
    Example usage:
    self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
    """
+
    def __init__(self, spaces):
        self.spaces = spaces
        for space in spaces:
-            assert isinstance(space, Space), "Elements of the tuple must be instances of gym.Space"
+            assert isinstance(
+                space, Space
+            ), "Elements of the tuple must be instances of gym.Space"
        super(Tuple, self).__init__(None, None)

    def seed(self, seed=None):
@@ -24,25 +27,38 @@ class Tuple(Space):
    def contains(self, x):
        if isinstance(x, list):
            x = tuple(x)  # Promote list to tuple for contains check
-        return isinstance(x, tuple) and len(x) == len(self.spaces) and all(
-            space.contains(part) for (space,part) in zip(self.spaces,x))
+        return (
+            isinstance(x, tuple)
+            and len(x) == len(self.spaces)
+            and all(space.contains(part) for (space, part) in zip(self.spaces, x))
+        )

    def __repr__(self):
-        return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")"
+        return "Tuple(" + ", ".join([str(s) for s in self.spaces]) + ")"

    def to_jsonable(self, sample_n):
        # serialize as list-repr of tuple of vectors
-        return [space.to_jsonable([sample[i] for sample in sample_n]) \
-                for i, space in enumerate(self.spaces)]
+        return [
+            space.to_jsonable([sample[i] for sample in sample_n])
+            for i, space in enumerate(self.spaces)
+        ]

    def from_jsonable(self, sample_n):
-        return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])]
+        return [
+            sample
+            for sample in zip(
+                *[
+                    space.from_jsonable(sample_n[i])
+                    for i, space in enumerate(self.spaces)
+                ]
+            )
+        ]

    def __getitem__(self, index):
        return self.spaces[index]

    def __len__(self):
        return len(self.spaces)
-      
+
    def __eq__(self, other):
        return isinstance(other, Tuple) and self.spaces == other.spaces
--- a/gym/spaces/utils.py
+++ b/gym/spaces/utils.py
@@ -50,10 +50,10 @@ def flatten(space, x):
        return onehot
    elif isinstance(space, Tuple):
        return np.concatenate(
-            [flatten(s, x_part) for x_part, s in zip(x, space.spaces)])
+            [flatten(s, x_part) for x_part, s in zip(x, space.spaces)]
+        )
    elif isinstance(space, Dict):
-        return np.concatenate(
-            [flatten(s, x[key]) for key, s in space.spaces.items()])
+        return np.concatenate([flatten(s, x[key]) for key, s in space.spaces.items()])
    elif isinstance(space, MultiBinary):
        return np.asarray(x, dtype=space.dtype).flatten()
    elif isinstance(space, MultiDiscrete):
@@ -89,8 +89,7 @@ def unflatten(space, x):
        list_flattened = np.split(x, np.cumsum(dims)[:-1])
        list_unflattened = [
            (key, unflatten(s, flattened))
-            for flattened, (key,
-                            s) in zip(list_flattened, space.spaces.items())
+            for flattened, (key, s) in zip(list_flattened, space.spaces.items())
        ]
        return OrderedDict(list_unflattened)
    elif isinstance(space, MultiBinary):
@@ -142,31 +141,23 @@ def flatten_space(space):
    if isinstance(space, Box):
        return Box(space.low.flatten(), space.high.flatten(), dtype=space.dtype)
    if isinstance(space, Discrete):
-        return Box(low=0, high=1, shape=(space.n, ), dtype=space.dtype)
+        return Box(low=0, high=1, shape=(space.n,), dtype=space.dtype)
    if isinstance(space, Tuple):
        space = [flatten_space(s) for s in space.spaces]
        return Box(
            low=np.concatenate([s.low for s in space]),
            high=np.concatenate([s.high for s in space]),
-            dtype=np.result_type(*[s.dtype for s in space])
+            dtype=np.result_type(*[s.dtype for s in space]),
        )
    if isinstance(space, Dict):
        space = [flatten_space(s) for s in space.spaces.values()]
        return Box(
            low=np.concatenate([s.low for s in space]),
            high=np.concatenate([s.high for s in space]),
-            dtype=np.result_type(*[s.dtype for s in space])
+            dtype=np.result_type(*[s.dtype for s in space]),
        )
    if isinstance(space, MultiBinary):
-        return Box(low=0,
-                   high=1,
-                   shape=(space.n, ),
-                   dtype=space.dtype
-                   )
+        return Box(low=0, high=1, shape=(space.n,), dtype=space.dtype)
    if isinstance(space, MultiDiscrete):
-        return Box(
-            low=np.zeros_like(space.nvec),
-            high=space.nvec,
-            dtype=space.dtype
-        )
+        return Box(low=np.zeros_like(space.nvec), high=space.nvec, dtype=space.dtype)
    raise NotImplementedError
--- a/gym/tests/test_core.py
+++ b/gym/tests/test_core.py
@@ -1,5 +1,6 @@
 from gym import core

+
 class ArgumentEnv(core.Env):
    calls = 0

@@ -7,9 +8,10 @@ class ArgumentEnv(core.Env):
        self.calls += 1
        self.arg = arg

+
 def test_env_instantiation():
    # This looks like a pretty trivial, but given our usage of
    # __new__, it's worth having.
-    env = ArgumentEnv('arg')
-    assert env.arg == 'arg'
+    env = ArgumentEnv("arg")
+    assert env.arg == "arg"
    assert env.calls == 1
--- a/gym/utils/atomic_write.py
+++ b/gym/utils/atomic_write.py
@@ -12,10 +12,12 @@ from contextlib import contextmanager
 # replace method which could result in the file temporarily
 # disappearing.
 import sys
+
 if sys.version_info >= (3, 3):
    # Python 3.3 and up have a native `replace` method
    from os import replace
 elif sys.platform.startswith("win"):
+
    def replace(src, dst):
        # TODO: on Windows, this will raise if the file is in use,
        # which is possible. We'll need to make this more robust over
@@ -25,24 +27,27 @@ elif sys.platform.startswith("win"):
        except OSError:
            pass
        os.rename(src, dst)
+
+
 else:
    # POSIX rename() is always atomic
    from os import rename as replace

+
@contextmanager
 def atomic_write(filepath, binary=False, fsync=False):
-    """ Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
+    """Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.

    :param filepath: the file path to be opened
    :param binary: whether to open the file in a binary mode instead of textual
    :param fsync: whether to force write the file to disk
    """

-    tmppath = filepath + '~'
+    tmppath = filepath + "~"
    while os.path.isfile(tmppath):
-        tmppath += '~'
+        tmppath += "~"
    try:
-        with open(tmppath, 'wb' if binary else 'w') as file:
+        with open(tmppath, "wb" if binary else "w") as file:
            yield file
            if fsync:
                file.flush()
--- a/gym/utils/closer.py
+++ b/gym/utils/closer.py
@@ -2,6 +2,7 @@ import atexit
 import threading
 import weakref

+
 class Closer(object):
    """A registry that ensures your objects get closed, whether manually,
    upon garbage collection, or upon exit. To work properly, your
@@ -48,7 +49,7 @@ class Closer(object):
        Returns:
            int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
        """
-        assert hasattr(closeable, 'close'), 'No close method for {}'.format(closeable)
+        assert hasattr(closeable, "close"), "No close method for {}".format(closeable)

        next_id = self.generate_next_id()
        self.closeables[next_id] = closeable
--- a/gym/utils/colorize.py
+++ b/gym/utils/colorize.py
@@ -11,11 +11,11 @@ color2num = dict(
    magenta=35,
    cyan=36,
    white=37,
-    crimson=38
+    crimson=38,
 )


-def colorize(string, color, bold=False, highlight = False):
+def colorize(string, color, bold=False, highlight=False):
    """Return string surrounded by appropriate terminal color codes to
    print colorized text.  Valid colors: gray, red, green, yellow,
    blue, magenta, cyan, white, crimson
@@ -23,8 +23,10 @@ def colorize(string, color, bold=False, highlight = False):

    attr = []
    num = color2num[color]
-    if highlight: num += 10
+    if highlight:
+        num += 10
    attr.append(str(num))
-    if bold: attr.append('1')
-    attrs = ';'.join(attr)
-    return '\x1b[%sm%s\x1b[0m' % (attrs, string)
+    if bold:
+        attr.append("1")
+    attrs = ";".join(attr)
+    return "\x1b[%sm%s\x1b[0m" % (attrs, string)
--- a/Show More
+++ b/Show More