Clean some docstrings (#1854)

* add type of argument * fix typos * split lines for formatting * reformat string, add ellipsis, remove r string * make docstring stylistically consistent * make docstrings a little more elaboratet * reduce by 1 space * make line wrap 120 * remove unnecessary line * add returns to docstring * add docstring, make code more pep8 and delete some unused print functions * more pep8 * file docstring instead of comments * delete unused variables, add file docstring and add some pep8 spring cleaning * add file docstring, fix typos and add some pep8 correections Co-authored-by: Dan <daniel.timbrell@ing.com>
2025-08-23 23:12:46 +00:00 · 2020-04-24 23:10:27 +02:00
parent f2c9793eb7
commit 3bd5ef71c2
7 changed files with 360 additions and 291 deletions
--- a/examples/agents/cem.py
+++ b/examples/agents/cem.py
@@ -11,12 +11,20 @@ def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
    """
    Generic implementation of the cross-entropy method for maximizing a black-box function
-    f: a function mapping from vector -> scalar
+    Args:
-    th_mean: initial mean over input distribution
+        f: a function mapping from vector -> scalar
-    batch_size: number of samples of theta to evaluate per batch
+        th_mean (np.array): initial mean over input distribution
-    n_iter: number of batches
+        batch_size (int): number of samples of theta to evaluate per batch
-    elite_frac: each batch, select this fraction of the top-performing samples
+        n_iter (int): number of batches
-    initial_std: initial standard deviation over parameter vectors
+        elite_frac (float): each batch, select this fraction of the top-performing samples
        initial_std (float): initial standard deviation over parameter vectors
    returns:
        A generator of dicts. Subsequent dicts correspond to iterations of CEM algorithm.
        The dicts contain the following values:
        'ys' :  numpy array with values of function evaluated at current population
        'ys_mean': mean value of function over current population
        'theta_mean': mean value of the parameter vector over current population
    """
    n_elite = int(np.round(batch_size*elite_frac))
    th_std = np.ones_like(th_mean) * initial_std
--- a/gym/core.py
+++ b/gym/core.py
@@ -6,7 +6,7 @@ env_closer = closer.Closer()
 class Env(object):
-    r"""The main OpenAI Gym class. It encapsulates an environment with
+    """The main OpenAI Gym class. It encapsulates an environment with
    arbitrary behind-the-scenes dynamics. An environment can be
    partially or fully observed.
@@ -26,9 +26,7 @@ class Env(object):
    Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
-    The methods are accessed publicly as "step", "reset", etc.. The
+    The methods are accessed publicly as "step", "reset", etc...
    non-underscored versions are wrapper methods to which we may add
    functionality over time.
    """
    # Set this in SOME subclasses
    metadata = {'render.modes': []}
@@ -174,9 +172,9 @@ class GoalEnv(Env):
    def compute_reward(self, achieved_goal, desired_goal, info):
        """Compute the step reward. This externalizes the reward function and makes
-        it dependent on an a desired goal and the one that was achieved. If you wish to include
+        it dependent on a desired goal and the one that was achieved. If you wish to include
        additional rewards that are independent of the goal, you can include the necessary values
-        to derive it in info and compute it accordingly.
+        to derive it in 'info' and compute it accordingly.
        Args:
            achieved_goal (object): the goal that was achieved during execution
@@ -194,7 +192,7 @@ class GoalEnv(Env):
 class Wrapper(Env):
-    r"""Wraps the environment to allow a modular transformation.
+    """Wraps the environment to allow a modular transformation.
    This class is the base class for all wrappers. The subclass could override
    some methods to change the behavior of the original environment without touching the
@@ -290,4 +288,4 @@ class ActionWrapper(Wrapper):
        raise NotImplementedError
    def reverse_action(self, action):
-        raise NotImplementedError
+        raise NotImplementedError
--- a/gym/envs/box2d/car_dynamics.py
+++ b/gym/envs/box2d/car_dynamics.py
@@ -1,34 +1,36 @@
 """
 Top-down car dynamics simulation.
 Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
 This simulation is a bit more detailed, with wheels rotation.
 Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 """
 import numpy as np
 import math
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
 # Top-down car dynamics simulation.
 #
 # Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
 # This simulation is a bit more detailed, with wheels rotation.
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 SIZE = 0.02
-ENGINE_POWER            = 100000000*SIZE*SIZE
+ENGINE_POWER = 100000000*SIZE*SIZE
 WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
-FRICTION_LIMIT          = 1000000*SIZE*SIZE     # friction ~= mass ~= size^2 (calculated implicitly using density)
+FRICTION_LIMIT = 1000000*SIZE*SIZE     # friction ~= mass ~= size^2 (calculated implicitly using density)
-WHEEL_R  = 27
+WHEEL_R = 27
-WHEEL_W  = 14
+WHEEL_W = 14
 WHEELPOS = [
-    (-55,+80), (+55,+80),
+    (-55, +80), (+55, +80),
-    (-55,-82), (+55,-82)
+    (-55, -82), (+55, -82)
    ]
-HULL_POLY1 =[
+HULL_POLY1 = [
-    (-60,+130), (+60,+130),
+    (-60, +130), (+60, +130),
-    (+60,+110), (-60,+110)
+    (+60, +110), (-60, +110)
    ]
-HULL_POLY2 =[
+HULL_POLY2 = [
-    (-15,+120), (+15,+120),
+    (-15, +120), (+15, +120),
-    (+20, +20), (-20,  20)
+    (+20, +20), (-20, 20)
    ]
-HULL_POLY3 =[
+HULL_POLY3 = [
    (+25, +20),
    (+50, -10),
    (+50, -40),
@@ -38,41 +40,42 @@ HULL_POLY3 =[
    (-50, -10),
    (-25, +20)
    ]
-HULL_POLY4 =[
+HULL_POLY4 = [
-    (-50,-120), (+50,-120),
+    (-50, -120), (+50, -120),
-    (+50,-90),  (-50,-90)
+    (+50, -90),  (-50, -90)
    ]
-WHEEL_COLOR = (0.0,0.0,0.0)
+WHEEL_COLOR = (0.0,  0.0, 0.0)
-WHEEL_WHITE = (0.3,0.3,0.3)
+WHEEL_WHITE = (0.3, 0.3, 0.3)
-MUD_COLOR   = (0.4,0.4,0.0)
+MUD_COLOR = (0.4, 0.4, 0.0)
 class Car:
    def __init__(self, world, init_angle, init_x, init_y):
        self.world = world
        self.hull = self.world.CreateDynamicBody(
-            position = (init_x, init_y),
+            position=(init_x, init_y),
-            angle = init_angle,
+            angle=init_angle,
-            fixtures = [
+            fixtures=[
-                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
+                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY1]), density=1.0),
-                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
+                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY2]), density=1.0),
-                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
+                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY3]), density=1.0),
-                fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
+                fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY4]), density=1.0)
                ]
            )
-        self.hull.color = (0.8,0.0,0.0)
+        self.hull.color = (0.8, 0.0, 0.0)
        self.wheels = []
        self.fuel_spent = 0.0
        WHEEL_POLY = [
-            (-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
+            (-WHEEL_W, +WHEEL_R), (+WHEEL_W, +WHEEL_R),
-            (+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
+            (+WHEEL_W, -WHEEL_R), (-WHEEL_W, -WHEEL_R)
            ]
-        for wx,wy in WHEELPOS:
+        for wx, wy in WHEELPOS:
            front_k = 1.0 if wy > 0 else 1.0
            w = self.world.CreateDynamicBody(
-                position = (init_x+wx*SIZE, init_y+wy*SIZE),
+                position=(init_x+wx*SIZE, init_y+wy*SIZE),
-                angle = init_angle,
+                angle=init_angle,
-                fixtures = fixtureDef(
+                fixtures=fixtureDef(
-                    shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
+                    shape=polygonShape(vertices=[(x*front_k*SIZE,y*front_k*SIZE) for x, y in WHEEL_POLY]),
                    density=0.1,
                    categoryBits=0x0020,
                    maskBits=0x001,
@@ -80,7 +83,7 @@ class Car:
                    )
            w.wheel_rad = front_k*WHEEL_R*SIZE
            w.color = WHEEL_COLOR
-            w.gas   = 0.0
+            w.gas = 0.0
            w.brake = 0.0
            w.steer = 0.0
            w.phase = 0.0  # wheel angle
@@ -90,24 +93,28 @@ class Car:
            rjd = revoluteJointDef(
                bodyA=self.hull,
                bodyB=w,
-                localAnchorA=(wx*SIZE,wy*SIZE),
+                localAnchorA=(wx*SIZE, wy*SIZE),
                localAnchorB=(0,0),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=180*900*SIZE*SIZE,
-                motorSpeed = 0,
+                motorSpeed=0,
-                lowerAngle = -0.4,
+                lowerAngle=-0.4,
-                upperAngle = +0.4,
+                upperAngle=+0.4,
                )
            w.joint = self.world.CreateJoint(rjd)
            w.tiles = set()
            w.userData = w
            self.wheels.append(w)
-        self.drawlist =  self.wheels + [self.hull]
+        self.drawlist = self.wheels + [self.hull]
        self.particles = []
    def gas(self, gas):
-        'control: rear wheel drive'
+        """control: rear wheel drive
        Args:
            gas (float): How much gas gets applied. Gets clipped between 0 and 1.
        """
        gas = np.clip(gas, 0, 1)
        for w in self.wheels[2:4]:
            diff = gas - w.gas
@@ -115,12 +122,18 @@ class Car:
            w.gas += diff
    def brake(self, b):
-        'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
+        """control: brake
        Args:
            b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation"""
        for w in self.wheels:
            w.brake = b
    def steer(self, s):
-        'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
+        """control: steer
        Args:
            s (-1..1): target position, it takes time to rotate steering wheel from side-to-side"""
        self.wheels[0].steer = s
        self.wheels[1].steer = s
@@ -148,7 +161,9 @@ class Car:
            # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
            # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
            # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
-            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)  # small coef not to divide by zero
+
            # add small coef not to divide by zero
            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)
            self.fuel_spent += dt*ENGINE_POWER*w.gas
            if w.brake >= 0.9:
@@ -167,13 +182,15 @@ class Car:
            # Physically correct is to always apply friction_limit until speed is equal.
            # But dt is finite, that will lead to oscillations if difference is already near zero.
-            f_force *= 205000*SIZE*SIZE  # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
+
            # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
            f_force *= 205000*SIZE*SIZE
            p_force *= 205000*SIZE*SIZE
            force = np.sqrt(np.square(f_force) + np.square(p_force))
            # Skid trace
            if abs(force) > 2.0*friction_limit:
-                if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
+                if w.skid_particle and w.skid_particle.grass == grass and len(w.skid_particle.poly) < 30:
                    w.skid_particle.poly.append( (w.position[0], w.position[1]) )
                elif w.skid_start is None:
                    w.skid_start = w.position
@@ -213,9 +230,9 @@ class Car:
                s2 = math.sin(a2)
                c1 = math.cos(a1)
                c2 = math.cos(a2)
-                if s1>0 and s2>0: continue
+                if s1 > 0 and s2 > 0: continue
-                if s1>0: c1 = np.sign(c1)
+                if s1 > 0: c1 = np.sign(c1)
-                if s2>0: c2 = np.sign(c2)
+                if s2 > 0: c2 = np.sign(c2)
                white_poly = [
                    (-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
                    (+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
@@ -228,7 +245,7 @@ class Car:
        p = Particle()
        p.color = WHEEL_COLOR if not grass else MUD_COLOR
        p.ttl = 1
-        p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
+        p.poly = [(point1[0], point1[1]), (point2[0], point2[1])]
        p.grass = grass
        self.particles.append(p)
        while len(self.particles) > 30:
--- a/gym/envs/box2d/car_racing.py
+++ b/gym/envs/box2d/car_racing.py
@@ -1,3 +1,33 @@
 """
 Easiest continuous control task to learn from pixels, a top-down racing environment.
 Discrete control is reasonable in this environment as well, on/off discretization is
 fine.
 State consists of STATE_W x STATE_H pixels.
 The reward is -0.1 every frame and +1000/N for every track tile visited, where N is
 the total number of tiles visited in the track. For example, if you have finished in 732 frames,
 your reward is 1000 - 0.1*732 = 926.8 points.
 The game is solved when the agent consistently gets 900+ points. The generated track is random every episode.
 The episode finishes when all the tiles are visited. The car also can go outside of the PLAYFIELD -  that
 is far off the track, then it will get -100 and die.
 Some indicators are shown at the bottom of the window along with the state RGB buffer. From
 left to right: the true speed, four ABS sensors, the steering wheel position and gyroscope.
 To play yourself (it's rather fast for humans), type:
 python gym/envs/box2d/car_racing.py
 Remember it's a powerful rear-wheel drive car -  don't press the accelerator and turn at the
 same time.
 Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 """
 import sys, math
 import numpy as np
@@ -12,33 +42,6 @@ from gym.utils import colorize, seeding, EzPickle
 import pyglet
 from pyglet import gl
 # Easiest continuous control task to learn from pixels, a top-down racing environment.
 # Discrete control is reasonable in this environment as well, on/off discretization is
 # fine.
 #
 # State consists of STATE_W x STATE_H pixels.
 #
 # Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
 # the total number of tiles visited in the track. For example, if you have finished in 732 frames,
 # your reward is 1000 - 0.1*732 = 926.8 points.
 #
 # Game is solved when agent consistently gets 900+ points. Track generated is random every episode.
 #
 # Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
 # is far off the track, then it will get -100 and die.
 #
 # Some indicators shown at the bottom of the window and the state RGB buffer. From
 # left to right: true speed, four ABS sensors, steering wheel position and gyroscope.
 #
 # To play yourself (it's rather fast for humans), type:
 #
 # python gym/envs/box2d/car_racing.py
 #
 # Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
 # same time.
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 STATE_W = 96   # less than Atari 160x192
 STATE_H = 96
 VIDEO_W = 600
@@ -46,12 +49,12 @@ VIDEO_H = 400
 WINDOW_W = 1000
 WINDOW_H = 800
-SCALE       = 6.0        # Track scale
+SCALE = 6.0             # Track scale
-TRACK_RAD   = 900/SCALE  # Track is heavily morphed circle with this radius
+TRACK_RAD = 900/SCALE   # Track is heavily morphed circle with this radius
-PLAYFIELD   = 2000/SCALE # Game over boundary
+PLAYFIELD = 2000/SCALE  # Game over boundary
-FPS         = 50         # Frames per second
+FPS = 50                # Frames per second
-ZOOM        = 2.7        # Camera zoom
+ZOOM = 2.7              # Camera zoom
-ZOOM_FOLLOW = True       # Set to False for fixed view (don't use zoom)
+ZOOM_FOLLOW = True      # Set to False for fixed view (don't use zoom)
 TRACK_DETAIL_STEP = 21/SCALE
@@ -62,14 +65,18 @@ BORDER_MIN_COUNT = 4
 ROAD_COLOR = [0.4, 0.4, 0.4]
 class FrictionDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
    def BeginContact(self, contact):
        self._contact(contact, True)
    def EndContact(self, contact):
        self._contact(contact, False)
    def _contact(self, contact, begin):
        tile = None
        obj = None
@@ -77,10 +84,10 @@ class FrictionDetector(contactListener):
        u2 = contact.fixtureB.body.userData
        if u1 and "road_friction" in u1.__dict__:
            tile = u1
-            obj  = u2
+            obj = u2
        if u2 and "road_friction" in u2.__dict__:
            tile = u2
-            obj  = u1
+            obj = u1
        if not tile:
            return
@@ -91,14 +98,12 @@ class FrictionDetector(contactListener):
            return
        if begin:
            obj.tiles.add(tile)
            # print tile.road_friction, "ADD", len(obj.tiles)
            if not tile.road_visited:
                tile.road_visited = True
                self.env.reward += 1000.0/len(self.env.track)
                self.env.tile_visited_count += 1
        else:
            obj.tiles.remove(tile)
            # print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
 class CarRacing(gym.Env, EzPickle):
    metadata = {
@@ -120,10 +125,12 @@ class CarRacing(gym.Env, EzPickle):
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(
-                shape = polygonShape(vertices=
+                shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))
-                    [(0, 0),(1, 0),(1, -1),(0, -1)]))
+
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
    def seed(self, seed=None):
@@ -146,19 +153,14 @@ class CarRacing(gym.Env, EzPickle):
        for c in range(CHECKPOINTS):
            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
-            if c==0:
+            if c == 0:
                alpha = 0
                rad = 1.5*TRACK_RAD
-            if c==CHECKPOINTS-1:
+            if c == CHECKPOINTS-1:
                alpha = 2*math.pi*c/CHECKPOINTS
                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
                rad = 1.5*TRACK_RAD
-            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
+            checkpoints.append((alpha, rad*math.cos(alpha), rad*math.sin(alpha)))
        # print "\n".join(str(h) for h in checkpoints)
        # self.road_poly = [ (    # uncomment this to see checkpoints
        #    [ (tx,ty) for a,tx,ty in checkpoints ],
        #    (0.7,0.7,0.9) ) ]
        self.road = []
        # Go from one checkpoint to another to create track
@@ -197,43 +199,42 @@ class CarRacing(gym.Env, EzPickle):
            dest_dx = dest_x - x  # vector towards destination
            dest_dy = dest_y - y
            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
-            while beta - alpha >  1.5*math.pi:
+            while beta - alpha > 1.5*math.pi:
                 beta -= 2*math.pi
            while beta - alpha < -1.5*math.pi:
                 beta += 2*math.pi
            prev_beta = beta
            proj *= SCALE
-            if proj >  0.3:
+            if proj > 0.3:
                 beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
            if proj < -0.3:
                 beta += min(TRACK_TURN_RATE, abs(0.001*proj))
            x += p1x*TRACK_DETAIL_STEP
            y += p1y*TRACK_DETAIL_STEP
-            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
+            track.append((alpha,prev_beta*0.5 + beta*0.5,x,y))
            if laps > 4:
                 break
            no_freeze -= 1
-            if no_freeze==0:
+            if no_freeze == 0:
                 break
        # print "\n".join([str(t) for t in enumerate(track)])
        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
        i = len(track)
        while True:
            i -= 1
-            if i==0:
+            if i == 0:
                return False  # Failed
            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
-            if pass_through_start and i2==-1:
+            if pass_through_start and i2 == -1:
                i2 = i
-            elif pass_through_start and i1==-1:
+            elif pass_through_start and i1 == -1:
                i1 = i
                break
        if self.verbose == 1:
            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
-        assert i1!=-1
+        assert i1 != -1
-        assert i2!=-1
+        assert i2 != -1
        track = track[i1:i2-1]
@@ -242,8 +243,8 @@ class CarRacing(gym.Env, EzPickle):
        first_perp_y = math.sin(first_beta)
        # Length of perpendicular jump to put together head and tail
        well_glued_together = np.sqrt(
-            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
+            np.square(first_perp_x*(track[0][2] - track[-1][2])) +
-            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
+            np.square(first_perp_y*(track[0][3] - track[-1][3])))
        if well_glued_together > TRACK_DETAIL_STEP:
            return False
@@ -284,11 +285,13 @@ class CarRacing(gym.Env, EzPickle):
            self.road.append(t)
            if border[i]:
                side = np.sign(beta2 - beta1)
-                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
+                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1))
-                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
+                b1_r = (x1 + side * (TRACK_WIDTH+BORDER) * math.cos(beta1),
-                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
+                        y1 + side * (TRACK_WIDTH+BORDER)*math.sin(beta1))
-                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
+                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2))
-                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
+                b2_r = (x2 + side * (TRACK_WIDTH+BORDER) * math.cos(beta2),
                        y2 + side * (TRACK_WIDTH+BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0)))
        self.track = track
        return True
@@ -305,7 +308,7 @@ class CarRacing(gym.Env, EzPickle):
            if success:
                break
            if self.verbose == 1:
-                print("retry to generate track (normal if there are not many of this messages)")
+                print("retry to generate track (normal if there are not many instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])
        return self.step(None)[0]
@@ -331,7 +334,7 @@ class CarRacing(gym.Env, EzPickle):
            self.car.fuel_spent = 0.0
            step_reward = self.reward - self.prev_reward
            self.prev_reward = self.reward
-            if self.tile_visited_count==len(self.track):
+            if self.tile_visited_count == len(self.track):
                done = True
            x, y = self.car.hull.position
            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
@@ -353,8 +356,6 @@ class CarRacing(gym.Env, EzPickle):
        if "t" not in self.__dict__: return  # reset() not called yet
        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
@@ -364,10 +365,10 @@ class CarRacing(gym.Env, EzPickle):
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(
            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
-            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
+            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)))
        self.transform.set_rotation(angle)
-        self.car.draw(self.viewer, mode!="state_pixels")
+        self.car.draw(self.viewer, mode != "state_pixels")
        arr = None
        win = self.viewer.window
@@ -376,7 +377,7 @@ class CarRacing(gym.Env, EzPickle):
        win.clear()
        t = self.transform
-        if mode=='rgb_array':
+        if mode == 'rgb_array':
            VP_W = VIDEO_W
            VP_H = VIDEO_H
        elif mode == 'state_pixels':
@@ -439,17 +440,19 @@ class CarRacing(gym.Env, EzPickle):
        gl.glBegin(gl.GL_QUADS)
        s = W/40.0
        h = H/40.0
-        gl.glColor4f(0,0,0,1)
+        gl.glColor4f(0, 0, 0, 1)
        gl.glVertex3f(W, 0, 0)
        gl.glVertex3f(W, 5*h, 0)
        gl.glVertex3f(0, 5*h, 0)
        gl.glVertex3f(0, 0, 0)
        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h, 0)
            gl.glVertex3f((place+0)*s, h, 0)
        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, 4*h , 0)
@@ -457,13 +460,13 @@ class CarRacing(gym.Env, EzPickle):
            gl.glVertex3f((place+val)*s, 2*h, 0)
            gl.glVertex3f((place+0)*s, 2*h, 0)
        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
-        vertical_ind(5, 0.02*true_speed, (1,1,1))
+        vertical_ind(5, 0.02*true_speed, (1, 1, 1))
-        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
+        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors
-        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
+        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0, 0, 1))
-        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
+        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2, 0, 1))
-        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
+        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2, 0, 1))
-        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
+        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0, 1, 0))
-        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
+        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1, 0, 0))
        gl.glEnd()
        self.score_label.text = "%04i" % self.reward
        self.score_label.draw()
@@ -471,19 +474,21 @@ class CarRacing(gym.Env, EzPickle):
 if __name__=="__main__":
    from pyglet.window import key
-    a = np.array( [0.0, 0.0, 0.0] )
+    a = np.array([0.0, 0.0, 0.0])
    def key_press(k, mod):
        global restart
-        if k==0xff0d: restart = True
+        if k == 0xff0d: restart = True
-        if k==key.LEFT:  a[0] = -1.0
+        if k == key.LEFT:  a[0] = -1.0
-        if k==key.RIGHT: a[0] = +1.0
+        if k == key.RIGHT: a[0] = +1.0
-        if k==key.UP:    a[1] = +1.0
+        if k == key.UP:    a[1] = +1.0
-        if k==key.DOWN:  a[2] = +0.8   # set 1.0 for wheels to block to zero rotation
+        if k == key.DOWN:  a[2] = +0.8   # set 1.0 for wheels to block to zero rotation
    def key_release(k, mod):
-        if k==key.LEFT  and a[0]==-1.0: a[0] = 0
+        if k == key.LEFT  and a[0] == -1.0: a[0] = 0
-        if k==key.RIGHT and a[0]==+1.0: a[0] = 0
+        if k == key.RIGHT and a[0] == +1.0: a[0] = 0
-        if k==key.UP:    a[1] = 0
+        if k == key.UP:    a[1] = 0
-        if k==key.DOWN:  a[2] = 0
+        if k == key.DOWN:  a[2] = 0
    env = CarRacing()
    env.render()
    env.viewer.window.on_key_press = key_press
@@ -504,9 +509,6 @@ if __name__=="__main__":
            if steps % 200 == 0 or done:
                print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
                print("step {} total_reward {:+0.2f}".format(steps, total_reward))
                #import matplotlib.pyplot as plt
                #plt.imshow(s)
                #plt.savefig("test.jpeg")
            steps += 1
            isopen = env.render()
            if done or restart or isopen == False:
--- a/gym/envs/box2d/lunar_lander.py
+++ b/gym/envs/box2d/lunar_lander.py
@@ -1,3 +1,31 @@
 """
 Rocket trajectory optimization is a classic topic in Optimal Control.
 According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
 turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
 The landing pad is always at coordinates (0,0). The coordinates are the first two numbers in the state vector.
 Reward for moving from the top of the screen to the landing pad and zero speed is about 100..140 points.
 If the lander moves away from the landing pad it loses reward. The episode finishes if the lander crashes or
 comes to rest, receiving an additional -100 or +100 points. Each leg with ground contact is +10 points.
 Firing the main engine is -0.3 points each frame. Firing the side engine is -0.03 points each frame.
 Solved is 200 points.
 Landing outside the landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
 on its first attempt. Please see the source code for details.
 To see a heuristic landing, run:
 python gym/envs/box2d/lunar_lander.py
 To play yourself, run:
 python examples/agents/keyboard_agent.py LunarLander-v2
 Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
 """
 import sys, math
 import numpy as np
@@ -8,41 +36,17 @@ import gym
 from gym import spaces
 from gym.utils import seeding, EzPickle
-# Rocket trajectory optimization is a classic topic in Optimal Control.
+FPS = 50
-#
+SCALE = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
 # According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
 # turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
 #
 # Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
 # Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
 # If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
 # comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
 # engine is -0.3 points each frame. Firing side engine is -0.03 points each frame. Solved is 200 points.
 #
 # Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
 # on its first attempt. Please see source code for details.
 #
 # To see heuristic landing, run:
 #
 # python gym/envs/box2d/lunar_lander.py
 #
 # To play yourself, run:
 #
 # python examples/agents/keyboard_agent.py LunarLander-v2
 #
 # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
-FPS    = 50
+MAIN_ENGINE_POWER = 13.0
-SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
+SIDE_ENGINE_POWER = 0.6
 MAIN_ENGINE_POWER  = 13.0
 SIDE_ENGINE_POWER  =  0.6
 INITIAL_RANDOM = 1000.0   # Set 1500 to make game harder
 LANDER_POLY =[
-    (-14,+17), (-17,0), (-17,-10),
+    (-14, +17), (-17, 0), (-17 ,-10),
-    (+17,-10), (+17,0), (+14,+17)
+    (+17, -10), (+17, 0), (+14, +17)
    ]
 LEG_AWAY = 20
 LEG_DOWN = 18
@@ -50,26 +54,30 @@ LEG_W, LEG_H = 2, 8
 LEG_SPRING_TORQUE = 40
 SIDE_ENGINE_HEIGHT = 14.0
-SIDE_ENGINE_AWAY   = 12.0
+SIDE_ENGINE_AWAY = 12.0
 VIEWPORT_W = 600
 VIEWPORT_H = 400
 class ContactDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
    def BeginContact(self, contact):
-        if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
+        if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
            self.env.game_over = True
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                self.env.legs[i].ground_contact = True
    def EndContact(self, contact):
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                self.env.legs[i].ground_contact = False
 class LunarLander(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
@@ -131,11 +139,11 @@ class LunarLander(gym.Env, EzPickle):
        # terrain
        CHUNKS = 11
-        height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
+        height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,))
-        chunk_x  = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
+        chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
        self.helipad_x1 = chunk_x[CHUNKS//2-1]
        self.helipad_x2 = chunk_x[CHUNKS//2+1]
-        self.helipad_y  = H/4
+        self.helipad_y = H/4
        height[CHUNKS//2-2] = self.helipad_y
        height[CHUNKS//2-1] = self.helipad_y
        height[CHUNKS//2+0] = self.helipad_y
@@ -143,45 +151,45 @@ class LunarLander(gym.Env, EzPickle):
        height[CHUNKS//2+2] = self.helipad_y
        smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
-        self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
+        self.moon = self.world.CreateStaticBody(shapes=edgeShape(vertices=[(0, 0), (W, 0)]))
        self.sky_polys = []
        for i in range(CHUNKS-1):
-            p1 = (chunk_x[i],   smooth_y[i])
+            p1 = (chunk_x[i], smooth_y[i])
            p2 = (chunk_x[i+1], smooth_y[i+1])
            self.moon.CreateEdgeFixture(
                vertices=[p1,p2],
                density=0,
                friction=0.1)
-            self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
+            self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
-        self.moon.color1 = (0.0,0.0,0.0)
+        self.moon.color1 = (0.0, 0.0, 0.0)
-        self.moon.color2 = (0.0,0.0,0.0)
+        self.moon.color2 = (0.0, 0.0, 0.0)
        initial_y = VIEWPORT_H/SCALE
        self.lander = self.world.CreateDynamicBody(
-            position = (VIEWPORT_W/SCALE/2, initial_y),
+            position=(VIEWPORT_W/SCALE/2, initial_y),
            angle=0.0,
            fixtures = fixtureDef(
-                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
+                shape=polygonShape(vertices=[(x/SCALE, y/SCALE) for x, y in LANDER_POLY]),
                density=5.0,
                friction=0.1,
                categoryBits=0x0010,
-                maskBits=0x001,  # collide only with ground
+                maskBits=0x001,   # collide only with ground
-                restitution=0.0) # 0.99 bouncy
+                restitution=0.0)  # 0.99 bouncy
                )
-        self.lander.color1 = (0.5,0.4,0.9)
+        self.lander.color1 = (0.5, 0.4, 0.9)
-        self.lander.color2 = (0.3,0.3,0.5)
+        self.lander.color2 = (0.3, 0.3, 0.5)
        self.lander.ApplyForceToCenter( (
            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
            ), True)
        self.legs = []
-        for i in [-1,+1]:
+        for i in [-1, +1]:
            leg = self.world.CreateDynamicBody(
-                position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
+                position=(VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
-                angle = (i*0.05),
+                angle=(i * 0.05),
-                fixtures = fixtureDef(
+                fixtures=fixtureDef(
                    shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
                    density=1.0,
                    restitution=0.0,
@@ -189,20 +197,20 @@ class LunarLander(gym.Env, EzPickle):
                    maskBits=0x001)
                )
            leg.ground_contact = False
-            leg.color1 = (0.5,0.4,0.9)
+            leg.color1 = (0.5, 0.4, 0.9)
-            leg.color2 = (0.3,0.3,0.5)
+            leg.color2 = (0.3, 0.3, 0.5)
            rjd = revoluteJointDef(
                bodyA=self.lander,
                bodyB=leg,
                localAnchorA=(0, 0),
-                localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
+                localAnchorB=(i * LEG_AWAY/SCALE, LEG_DOWN/SCALE),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=LEG_SPRING_TORQUE,
-                motorSpeed=+0.3*i  # low enough not to jump back into the sky
+                motorSpeed=+0.3 * i  # low enough not to jump back into the sky
                )
-            if i==-1:
+            if i == -1:
-                rjd.lowerAngle = +0.9 - 0.5  # Yes, the most esoteric numbers here, angles legs have freedom to travel within
+                rjd.lowerAngle = +0.9 - 0.5  # The most esoteric numbers here, angled legs have freedom to travel within
                rjd.upperAngle = +0.9
            else:
                rjd.lowerAngle = -0.9
@@ -212,14 +220,14 @@ class LunarLander(gym.Env, EzPickle):
        self.drawlist = [self.lander] + self.legs
-        return self.step(np.array([0,0]) if self.continuous else 0)[0]
+        return self.step(np.array([0, 0]) if self.continuous else 0)[0]
    def _create_particle(self, mass, x, y, ttl):
        p = self.world.CreateDynamicBody(
-            position = (x,y),
+            position = (x, y),
            angle=0.0,
            fixtures = fixtureDef(
-                shape=circleShape(radius=2/SCALE, pos=(0,0)),
+                shape=circleShape(radius=2/SCALE, pos=(0, 0)),
                density=mass,
                friction=0.1,
                categoryBits=0x0100,
@@ -232,7 +240,7 @@ class LunarLander(gym.Env, EzPickle):
        return p
    def _clean_particles(self, all):
-        while self.particles and (all or self.particles[0].ttl<0):
+        while self.particles and (all or self.particles[0].ttl < 0):
            self.world.DestroyBody(self.particles.pop(0))
    def step(self, action):
@@ -243,40 +251,53 @@ class LunarLander(gym.Env, EzPickle):
        # Engines
        tip  = (math.sin(self.lander.angle), math.cos(self.lander.angle))
-        side = (-tip[1], tip[0]);
+        side = (-tip[1], tip[0])
        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
        m_power = 0.0
-        if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
+        if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
            # Main engine
            if self.continuous:
                m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5   # 0.5..1.0
-                assert m_power>=0.5 and m_power <= 1.0
+                assert m_power >= 0.5 and m_power <= 1.0
            else:
                m_power = 1.0
-            ox =  tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1]   # 4 is move a bit downwards, +-2 for randomness
+            ox = (tip[0] * (4/SCALE + 2 * dispersion[0]) +
-            oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
+                  side[0] * dispersion[1])  # 4 is move a bit downwards, +-2 for randomness
            oy = -tip[1] * (4/SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
-            p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power)    # particles are just a decoration, 3.5 is here to make particle speed adequate
+            p = self._create_particle(3.5,  # 3.5 is here to make particle speed adequate
-            p.ApplyLinearImpulse(           ( ox*MAIN_ENGINE_POWER*m_power,  oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
+                                      impulse_pos[0],
-            self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
+                                      impulse_pos[1],
                                      m_power)  # particles are just a decoration
            p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
                                 impulse_pos,
                                 True)
            self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
                                           impulse_pos,
                                           True)
        s_power = 0.0
-        if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
+        if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
            # Orientation engines
            if self.continuous:
                direction = np.sign(action[1])
-                s_power = np.clip(np.abs(action[1]), 0.5,1.0)
+                s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
-                assert s_power>=0.5 and s_power <= 1.0
+                assert s_power >= 0.5 and s_power <= 1.0
            else:
                direction = action-2
                s_power = 1.0
-            ox =  tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
+            ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
-            oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
+            oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
-            impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
+            impulse_pos = (self.lander.position[0] + ox - tip[0] * 17/SCALE,
                           self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT/SCALE)
            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
-            p.ApplyLinearImpulse(           ( ox*SIDE_ENGINE_POWER*s_power,  oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
+            p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
-            self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
+                                 impulse_pos
                                 , True)
            self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
                                           impulse_pos,
                                           True)
        self.world.Step(1.0/FPS, 6*30, 2*30)
@@ -292,27 +313,27 @@ class LunarLander(gym.Env, EzPickle):
            1.0 if self.legs[0].ground_contact else 0.0,
            1.0 if self.legs[1].ground_contact else 0.0
            ]
-        assert len(state)==8
+        assert len(state) == 8
        reward = 0
        shaping = \
            - 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
            - 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
-            - 100*abs(state[4]) + 10*state[6] + 10*state[7]   # And ten points for legs contact, the idea is if you
+            - 100*abs(state[4]) + 10*state[6] + 10*state[7]  # And ten points for legs contact, the idea is if you
-                                                              # lose contact again after landing, you get negative reward
+                                                             # lose contact again after landing, you get negative reward
        if self.prev_shaping is not None:
            reward = shaping - self.prev_shaping
        self.prev_shaping = shaping
-        reward -= m_power*0.30  # less fuel spent is better, about -30 for heurisic landing
+        reward -= m_power*0.30  # less fuel spent is better, about -30 for heuristic landing
        reward -= s_power*0.03
        done = False
        if self.game_over or abs(state[0]) >= 1.0:
-            done   = True
+            done = True
            reward = -100
        if not self.lander.awake:
-            done   = True
+            done = True
            reward = +100
        return np.array(state, dtype=np.float32), reward, done, {}
@@ -324,13 +345,13 @@ class LunarLander(gym.Env, EzPickle):
        for obj in self.particles:
            obj.ttl -= 0.15
-            obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
+            obj.color1 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
-            obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
+            obj.color2 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
        self._clean_particles(False)
        for p in self.sky_polys:
-            self.viewer.draw_polygon(p, color=(0,0,0))
+            self.viewer.draw_polygon(p, color=(0, 0, 0))
        for obj in self.particles + self.drawlist:
            for f in obj.fixtures:
@@ -348,42 +369,56 @@ class LunarLander(gym.Env, EzPickle):
        for x in [self.helipad_x1, self.helipad_x2]:
            flagy1 = self.helipad_y
            flagy2 = flagy1 + 50/SCALE
-            self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
+            self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
-            self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
+            self.viewer.draw_polygon([(x, flagy2), (x, flagy2-10/SCALE), (x + 25/SCALE, flagy2 - 5/SCALE)],
                                     color=(0.8, 0.8, 0))
-        return self.viewer.render(return_rgb_array = mode=='rgb_array')
+        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
 class LunarLanderContinuous(LunarLander):
    continuous = True
 def heuristic(env, s):
-    # Heuristic for:
+    """
-    # 1. Testing. 
+    The heuristic for
-    # 2. Demonstration rollout.
+    1. Testing
-    angle_targ = s[0]*0.5 + s[2]*1.0         # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
+    2. Demonstration rollout.
-    if angle_targ >  0.4: angle_targ =  0.4  # more than 0.4 radians (22 degrees) is bad
+
    Args:
        env: The environment
        s (list): The state. Attributes:
                  s[0] is the horizontal coordinate
                  s[1] is the vertical coordinate
                  s[2] is the horizontal speed
                  s[3] is the vertical speed
                  s[4] is the angle
                  s[5] is the angular speed
                  s[6] 1 if first leg has contact, else 0
                  s[7] 1 if second leg has contact, else 0
    returns:
         a: The heuristic to be fed into the step function defined above to determine the next step and reward.
    """
    angle_targ = s[0]*0.5 + s[2]*1.0         # angle should point towards center
    if angle_targ > 0.4: angle_targ = 0.4    # more than 0.4 radians (22 degrees) is bad
    if angle_targ < -0.4: angle_targ = -0.4
-    hover_targ = 0.55*np.abs(s[0])           # target y should be proporional to horizontal offset
+    hover_targ = 0.55*np.abs(s[0])           # target y should be proportional to horizontal offset
-    # PID controller: s[4] angle, s[5] angularSpeed
+    angle_todo = (angle_targ - s[4]) * 0.5 - (s[5])*1.0
    angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
    #print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
    # PID controller: s[1] vertical coordinate s[3] vertical speed
    hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
    #print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
-    if s[6] or s[7]: # legs have contact
+    if s[6] or s[7]:  # legs have contact
        angle_todo = 0
        hover_todo = -(s[3])*0.5  # override to reduce fall speed, that's all we need after contact
    if env.continuous:
-        a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
+        a = np.array([hover_todo*20 - 1, -angle_todo*20])
        a = np.clip(a, -1, +1)
    else:
        a = 0
@@ -416,5 +451,3 @@ def demo_heuristic_lander(env, seed=None, render=False):
 if __name__ == '__main__':
    demo_heuristic_lander(LunarLander(), render=True)
--- a/gym/envs/classic_control/acrobot.py
+++ b/gym/envs/classic_control/acrobot.py
@@ -214,13 +214,17 @@ class AcrobotEnv(core.Env):
            self.viewer = None
 def wrap(x, m, M):
-    """
+    """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
    :param x: a scalar
    :param m: minimum possible value in range
    :param M: maximum possible value in range
    Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
    For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
    Args:
        x: a scalar
        m: minimum possible value in range
        M: maximum possible value in range
    Returns:
        x: a scalar, wrapped
    """
    diff = M - m
    while x > M:
@@ -230,10 +234,14 @@ def wrap(x, m, M):
    return x
 def bound(x, m, M=None):
-    """
+    """Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
    :param x: scalar
    Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
    have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
    Args:
        x: scalar
    Returns:
        x: scalar, bound between min (m) and Max (M)
    """
    if M is None:
        M = m[1]
@@ -248,17 +256,14 @@ def rk4(derivs, y0, t, *args, **kwargs):
    This is a toy implementation which may be useful if you find
    yourself stranded on a system w/o scipy.  Otherwise use
    :func:`scipy.integrate`.
-    *y0*
+
-        initial state vector
+    Args:
-    *t*
+        derivs: the derivative of the system and has the signature ``dy = derivs(yi, ti)``
-        sample times
+        y0: initial state vector
-    *derivs*
+        t: sample times
-        returns the derivative of the system and has the
+        args: additional arguments passed to the derivative function
-        signature ``dy = derivs(yi, ti)``
+        kwargs: additional keyword arguments passed to the derivative function
-    *args*
+
        additional arguments passed to the derivative function
    *kwargs*
        additional keyword arguments passed to the derivative function
    Example 1 ::
        ## 2D system
        def derivs6(x,t):
@@ -278,6 +283,9 @@ def rk4(derivs, y0, t, *args, **kwargs):
        yout = rk4(derivs, y0, t)
    If you have access to scipy, you should probably be using the
    scipy.integrate tools rather than this function.
    Returns:
        yout: Runge-Kutta approximation of the ODE
    """
    try:
--- a/gym/envs/classic_control/cartpole.py
+++ b/gym/envs/classic_control/cartpole.py
@@ -13,17 +13,18 @@ import numpy as np
 class CartPoleEnv(gym.Env):
    """
    Description:
-        A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
+        A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum
        starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
    Source:
        This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
    Observation: 
        Type: Box(4)
-        Num	Observation                 Min         Max
+        Num	Observation               Min             Max
        0	Cart Position             -4.8            4.8
        1	Cart Velocity             -Inf            Inf
-        2	Pole Angle                 -24 deg        24 deg
+        2	Pole Angle                -24 deg         24 deg
        3	Pole Velocity At Tip      -Inf            Inf
    Actions:
@@ -32,7 +33,9 @@ class CartPoleEnv(gym.Env):
        0	Push cart to the left
        1	Push cart to the right
-        Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
+        Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is
        pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the
        cart underneath it
    Reward:
        Reward is 1 for every step taken, including the termination step