Clean some docstrings (#1854)

* add type of argument * fix typos * split lines for formatting * reformat string, add ellipsis, remove r string * make docstring stylistically consistent * make docstrings a little more elaboratet * reduce by 1 space * make line wrap 120 * remove unnecessary line * add returns to docstring * add docstring, make code more pep8 and delete some unused print functions * more pep8 * file docstring instead of comments * delete unused variables, add file docstring and add some pep8 spring cleaning * add file docstring, fix typos and add some pep8 correections Co-authored-by: Dan <daniel.timbrell@ing.com>
2025-08-23 23:12:46 +00:00 · 2020-04-24 23:10:27 +02:00
parent f2c9793eb7
commit 3bd5ef71c2
7 changed files with 360 additions and 291 deletions
--- a/examples/agents/cem.py
+++ b/examples/agents/cem.py
@@ -11,12 +11,20 @@ def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
    """
    Generic implementation of the cross-entropy method for maximizing a black-box function

+    Args:
        f: a function mapping from vector -> scalar
-    th_mean: initial mean over input distribution
-    batch_size: number of samples of theta to evaluate per batch
-    n_iter: number of batches
-    elite_frac: each batch, select this fraction of the top-performing samples
-    initial_std: initial standard deviation over parameter vectors
+        th_mean (np.array): initial mean over input distribution
+        batch_size (int): number of samples of theta to evaluate per batch
+        n_iter (int): number of batches
+        elite_frac (float): each batch, select this fraction of the top-performing samples
+        initial_std (float): initial standard deviation over parameter vectors
+
+    returns:
+        A generator of dicts. Subsequent dicts correspond to iterations of CEM algorithm.
+        The dicts contain the following values:
+        'ys' :  numpy array with values of function evaluated at current population
+        'ys_mean': mean value of function over current population
+        'theta_mean': mean value of the parameter vector over current population
    """
    n_elite = int(np.round(batch_size*elite_frac))
    th_std = np.ones_like(th_mean) * initial_std
--- a/gym/core.py
+++ b/gym/core.py
@@ -6,7 +6,7 @@ env_closer = closer.Closer()


 class Env(object):
-    r"""The main OpenAI Gym class. It encapsulates an environment with
+    """The main OpenAI Gym class. It encapsulates an environment with
    arbitrary behind-the-scenes dynamics. An environment can be
    partially or fully observed.

@@ -26,9 +26,7 @@ class Env(object):

    Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.

-    The methods are accessed publicly as "step", "reset", etc.. The
-    non-underscored versions are wrapper methods to which we may add
-    functionality over time.
+    The methods are accessed publicly as "step", "reset", etc...
    """
    # Set this in SOME subclasses
    metadata = {'render.modes': []}
@@ -174,9 +172,9 @@ class GoalEnv(Env):

    def compute_reward(self, achieved_goal, desired_goal, info):
        """Compute the step reward. This externalizes the reward function and makes
-        it dependent on an a desired goal and the one that was achieved. If you wish to include
+        it dependent on a desired goal and the one that was achieved. If you wish to include
        additional rewards that are independent of the goal, you can include the necessary values
-        to derive it in info and compute it accordingly.
+        to derive it in 'info' and compute it accordingly.

        Args:
            achieved_goal (object): the goal that was achieved during execution
@@ -194,7 +192,7 @@ class GoalEnv(Env):


 class Wrapper(Env):
-    r"""Wraps the environment to allow a modular transformation.
+    """Wraps the environment to allow a modular transformation.

    This class is the base class for all wrappers. The subclass could override
    some methods to change the behavior of the original environment without touching the
--- a/gym/envs/box2d/car_dynamics.py
+++ b/gym/envs/box2d/car_dynamics.py
@@ -1,15 +1,17 @@
+"""
+Top-down car dynamics simulation.
+
+Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
+This simulation is a bit more detailed, with wheels rotation.
+
+Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+"""
+
 import numpy as np
 import math
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)

-# Top-down car dynamics simulation.
-#
-# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
-# This simulation is a bit more detailed, with wheels rotation.
-#
-# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
-
 SIZE = 0.02
 ENGINE_POWER = 100000000*SIZE*SIZE
 WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
@@ -46,6 +48,7 @@ WHEEL_COLOR = (0.0,0.0,0.0)
 WHEEL_WHITE = (0.3, 0.3, 0.3)
 MUD_COLOR = (0.4, 0.4, 0.0)

+
 class Car:
    def __init__(self, world, init_angle, init_x, init_y):
        self.world = world
@@ -107,7 +110,11 @@ class Car:
        self.particles = []

    def gas(self, gas):
-        'control: rear wheel drive'
+        """control: rear wheel drive
+
+        Args:
+            gas (float): How much gas gets applied. Gets clipped between 0 and 1.
+        """
        gas = np.clip(gas, 0, 1)
        for w in self.wheels[2:4]:
            diff = gas - w.gas
@@ -115,12 +122,18 @@ class Car:
            w.gas += diff

    def brake(self, b):
-        'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
+        """control: brake
+
+        Args:
+            b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation"""
        for w in self.wheels:
            w.brake = b

    def steer(self, s):
-        'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
+        """control: steer
+
+        Args:
+            s (-1..1): target position, it takes time to rotate steering wheel from side-to-side"""
        self.wheels[0].steer = s
        self.wheels[1].steer = s

@@ -148,7 +161,9 @@ class Car:
            # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
            # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
            # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
-            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)  # small coef not to divide by zero
+
+            # add small coef not to divide by zero
+            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)
            self.fuel_spent += dt*ENGINE_POWER*w.gas

            if w.brake >= 0.9:
@@ -167,7 +182,9 @@ class Car:

            # Physically correct is to always apply friction_limit until speed is equal.
            # But dt is finite, that will lead to oscillations if difference is already near zero.
-            f_force *= 205000*SIZE*SIZE  # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
+
+            # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
+            f_force *= 205000*SIZE*SIZE
            p_force *= 205000*SIZE*SIZE
            force = np.sqrt(np.square(f_force) + np.square(p_force))

--- a/gym/envs/box2d/car_racing.py
+++ b/gym/envs/box2d/car_racing.py
@@ -1,3 +1,33 @@
+"""
+
+Easiest continuous control task to learn from pixels, a top-down racing environment.
+Discrete control is reasonable in this environment as well, on/off discretization is
+fine.
+
+State consists of STATE_W x STATE_H pixels.
+
+The reward is -0.1 every frame and +1000/N for every track tile visited, where N is
+the total number of tiles visited in the track. For example, if you have finished in 732 frames,
+your reward is 1000 - 0.1*732 = 926.8 points.
+
+The game is solved when the agent consistently gets 900+ points. The generated track is random every episode.
+
+The episode finishes when all the tiles are visited. The car also can go outside of the PLAYFIELD -  that
+is far off the track, then it will get -100 and die.
+
+Some indicators are shown at the bottom of the window along with the state RGB buffer. From
+left to right: the true speed, four ABS sensors, the steering wheel position and gyroscope.
+
+To play yourself (it's rather fast for humans), type:
+
+python gym/envs/box2d/car_racing.py
+
+Remember it's a powerful rear-wheel drive car -  don't press the accelerator and turn at the
+same time.
+
+Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+"""
+
 import sys, math
 import numpy as np

@@ -12,33 +42,6 @@ from gym.utils import colorize, seeding, EzPickle
 import pyglet
 from pyglet import gl

-# Easiest continuous control task to learn from pixels, a top-down racing environment.
-# Discrete control is reasonable in this environment as well, on/off discretization is
-# fine.
-#
-# State consists of STATE_W x STATE_H pixels.
-#
-# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
-# the total number of tiles visited in the track. For example, if you have finished in 732 frames,
-# your reward is 1000 - 0.1*732 = 926.8 points.
-#
-# Game is solved when agent consistently gets 900+ points. Track generated is random every episode.
-#
-# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
-# is far off the track, then it will get -100 and die.
-#
-# Some indicators shown at the bottom of the window and the state RGB buffer. From
-# left to right: true speed, four ABS sensors, steering wheel position and gyroscope.
-#
-# To play yourself (it's rather fast for humans), type:
-#
-# python gym/envs/box2d/car_racing.py
-#
-# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
-# same time.
-#
-# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
-
 STATE_W = 96   # less than Atari 160x192
 STATE_H = 96
 VIDEO_W = 600
@@ -62,14 +65,18 @@ BORDER_MIN_COUNT = 4

 ROAD_COLOR = [0.4, 0.4, 0.4]

+
 class FrictionDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
+
    def BeginContact(self, contact):
        self._contact(contact, True)
+
    def EndContact(self, contact):
        self._contact(contact, False)
+
    def _contact(self, contact, begin):
        tile = None
        obj = None
@@ -91,14 +98,12 @@ class FrictionDetector(contactListener):
            return
        if begin:
            obj.tiles.add(tile)
-            # print tile.road_friction, "ADD", len(obj.tiles)
            if not tile.road_visited:
                tile.road_visited = True
                self.env.reward += 1000.0/len(self.env.track)
                self.env.tile_visited_count += 1
        else:
            obj.tiles.remove(tile)
-            # print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)

 class CarRacing(gym.Env, EzPickle):
    metadata = {
@@ -120,10 +125,12 @@ class CarRacing(gym.Env, EzPickle):
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(
-                shape = polygonShape(vertices=
-                    [(0, 0),(1, 0),(1, -1),(0, -1)]))
+                shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))
+
+        self.action_space = spaces.Box(np.array([-1, 0, 0]),
+                                       np.array([+1, +1, +1]),
+                                       dtype=np.float32)  # steer, gas, brake

-        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)

    def seed(self, seed=None):
@@ -154,11 +161,6 @@ class CarRacing(gym.Env, EzPickle):
                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
                rad = 1.5*TRACK_RAD
            checkpoints.append((alpha, rad*math.cos(alpha), rad*math.sin(alpha)))
-
-        # print "\n".join(str(h) for h in checkpoints)
-        # self.road_poly = [ (    # uncomment this to see checkpoints
-        #    [ (tx,ty) for a,tx,ty in checkpoints ],
-        #    (0.7,0.7,0.9) ) ]
        self.road = []

        # Go from one checkpoint to another to create track
@@ -215,7 +217,6 @@ class CarRacing(gym.Env, EzPickle):
            no_freeze -= 1
            if no_freeze == 0:
                 break
-        # print "\n".join([str(t) for t in enumerate(track)])

        # Find closed loop range i1..i2, first loop should be ignored, second is OK
        i1, i2 = -1, -1
@@ -285,9 +286,11 @@ class CarRacing(gym.Env, EzPickle):
            if border[i]:
                side = np.sign(beta2 - beta1)
                b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1))
-                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
+                b1_r = (x1 + side * (TRACK_WIDTH+BORDER) * math.cos(beta1),
+                        y1 + side * (TRACK_WIDTH+BORDER)*math.sin(beta1))
                b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2))
-                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
+                b2_r = (x2 + side * (TRACK_WIDTH+BORDER) * math.cos(beta2),
+                        y2 + side * (TRACK_WIDTH+BORDER) * math.sin(beta2))
                self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0)))
        self.track = track
        return True
@@ -305,7 +308,7 @@ class CarRacing(gym.Env, EzPickle):
            if success:
                break
            if self.verbose == 1:
-                print("retry to generate track (normal if there are not many of this messages)")
+                print("retry to generate track (normal if there are not many instances of this message)")
        self.car = Car(self.world, *self.track[0][1:4])

        return self.step(None)[0]
@@ -353,8 +356,6 @@ class CarRacing(gym.Env, EzPickle):
        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
-        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
-        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle
@@ -444,12 +445,14 @@ class CarRacing(gym.Env, EzPickle):
        gl.glVertex3f(W, 5*h, 0)
        gl.glVertex3f(0, 5*h, 0)
        gl.glVertex3f(0, 0, 0)
+
        def vertical_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h + h*val, 0)
            gl.glVertex3f((place+1)*s, h, 0)
            gl.glVertex3f((place+0)*s, h, 0)
+
        def horiz_ind(place, val, color):
            gl.glColor4f(color[0], color[1], color[2], 1)
            gl.glVertex3f((place+0)*s, 4*h , 0)
@@ -472,6 +475,7 @@ class CarRacing(gym.Env, EzPickle):
 if __name__=="__main__":
    from pyglet.window import key
    a = np.array([0.0, 0.0, 0.0])
+
    def key_press(k, mod):
        global restart
        if k == 0xff0d: restart = True
@@ -479,6 +483,7 @@ if __name__=="__main__":
        if k == key.RIGHT: a[0] = +1.0
        if k == key.UP:    a[1] = +1.0
        if k == key.DOWN:  a[2] = +0.8   # set 1.0 for wheels to block to zero rotation
+
    def key_release(k, mod):
        if k == key.LEFT  and a[0] == -1.0: a[0] = 0
        if k == key.RIGHT and a[0] == +1.0: a[0] = 0
@@ -504,9 +509,6 @@ if __name__=="__main__":
            if steps % 200 == 0 or done:
                print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
                print("step {} total_reward {:+0.2f}".format(steps, total_reward))
-                #import matplotlib.pyplot as plt
-                #plt.imshow(s)
-                #plt.savefig("test.jpeg")
            steps += 1
            isopen = env.render()
            if done or restart or isopen == False:
--- a/gym/envs/box2d/lunar_lander.py
+++ b/gym/envs/box2d/lunar_lander.py
@@ -1,3 +1,31 @@
+"""
+Rocket trajectory optimization is a classic topic in Optimal Control.
+
+According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
+turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
+
+The landing pad is always at coordinates (0,0). The coordinates are the first two numbers in the state vector.
+Reward for moving from the top of the screen to the landing pad and zero speed is about 100..140 points.
+If the lander moves away from the landing pad it loses reward. The episode finishes if the lander crashes or
+comes to rest, receiving an additional -100 or +100 points. Each leg with ground contact is +10 points.
+Firing the main engine is -0.3 points each frame. Firing the side engine is -0.03 points each frame.
+Solved is 200 points.
+
+Landing outside the landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
+on its first attempt. Please see the source code for details.
+
+To see a heuristic landing, run:
+
+python gym/envs/box2d/lunar_lander.py
+
+To play yourself, run:
+
+python examples/agents/keyboard_agent.py LunarLander-v2
+
+Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+"""
+
+
 import sys, math
 import numpy as np

@@ -8,30 +36,6 @@ import gym
 from gym import spaces
 from gym.utils import seeding, EzPickle

-# Rocket trajectory optimization is a classic topic in Optimal Control.
-#
-# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
-# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
-#
-# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
-# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
-# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
-# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
-# engine is -0.3 points each frame. Firing side engine is -0.03 points each frame. Solved is 200 points.
-#
-# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
-# on its first attempt. Please see source code for details.
-#
-# To see heuristic landing, run:
-#
-# python gym/envs/box2d/lunar_lander.py
-#
-# To play yourself, run:
-#
-# python examples/agents/keyboard_agent.py LunarLander-v2
-#
-# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
-
 FPS = 50
 SCALE = 30.0   # affects how fast-paced the game is, forces should be adjusted as well

@@ -55,21 +59,25 @@ SIDE_ENGINE_AWAY   = 12.0
 VIEWPORT_W = 600
 VIEWPORT_H = 400

+
 class ContactDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env
+
    def BeginContact(self, contact):
        if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
            self.env.game_over = True
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                self.env.legs[i].ground_contact = True
+
    def EndContact(self, contact):
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                self.env.legs[i].ground_contact = False

+
 class LunarLander(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
@@ -202,7 +210,7 @@ class LunarLander(gym.Env, EzPickle):
                motorSpeed=+0.3 * i  # low enough not to jump back into the sky
                )
            if i == -1:
-                rjd.lowerAngle = +0.9 - 0.5  # Yes, the most esoteric numbers here, angles legs have freedom to travel within
+                rjd.lowerAngle = +0.9 - 0.5  # The most esoteric numbers here, angled legs have freedom to travel within
                rjd.upperAngle = +0.9
            else:
                rjd.lowerAngle = -0.9
@@ -243,7 +251,7 @@ class LunarLander(gym.Env, EzPickle):

        # Engines
        tip  = (math.sin(self.lander.angle), math.cos(self.lander.angle))
-        side = (-tip[1], tip[0]);
+        side = (-tip[1], tip[0])
        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]

        m_power = 0.0
@@ -254,12 +262,20 @@ class LunarLander(gym.Env, EzPickle):
                assert m_power >= 0.5 and m_power <= 1.0
            else:
                m_power = 1.0
-            ox =  tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1]   # 4 is move a bit downwards, +-2 for randomness
+            ox = (tip[0] * (4/SCALE + 2 * dispersion[0]) +
+                  side[0] * dispersion[1])  # 4 is move a bit downwards, +-2 for randomness
            oy = -tip[1] * (4/SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
-            p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power)    # particles are just a decoration, 3.5 is here to make particle speed adequate
-            p.ApplyLinearImpulse(           ( ox*MAIN_ENGINE_POWER*m_power,  oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
-            self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
+            p = self._create_particle(3.5,  # 3.5 is here to make particle speed adequate
+                                      impulse_pos[0],
+                                      impulse_pos[1],
+                                      m_power)  # particles are just a decoration
+            p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
+                                 impulse_pos,
+                                 True)
+            self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
+                                           impulse_pos,
+                                           True)

        s_power = 0.0
        if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
@@ -273,10 +289,15 @@ class LunarLander(gym.Env, EzPickle):
                s_power = 1.0
            ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
            oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
-            impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
+            impulse_pos = (self.lander.position[0] + ox - tip[0] * 17/SCALE,
+                           self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT/SCALE)
            p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
-            p.ApplyLinearImpulse(           ( ox*SIDE_ENGINE_POWER*s_power,  oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
-            self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
+            p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
+                                 impulse_pos
+                                 , True)
+            self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
+                                           impulse_pos,
+                                           True)

        self.world.Step(1.0/FPS, 6*30, 2*30)

@@ -304,7 +325,7 @@ class LunarLander(gym.Env, EzPickle):
            reward = shaping - self.prev_shaping
        self.prev_shaping = shaping

-        reward -= m_power*0.30  # less fuel spent is better, about -30 for heurisic landing
+        reward -= m_power*0.30  # less fuel spent is better, about -30 for heuristic landing
        reward -= s_power*0.03

        done = False
@@ -349,7 +370,8 @@ class LunarLander(gym.Env, EzPickle):
            flagy1 = self.helipad_y
            flagy2 = flagy1 + 50/SCALE
            self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
-            self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
+            self.viewer.draw_polygon([(x, flagy2), (x, flagy2-10/SCALE), (x + 25/SCALE, flagy2 - 5/SCALE)],
+                                     color=(0.8, 0.8, 0))

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

@@ -358,25 +380,38 @@ class LunarLander(gym.Env, EzPickle):
            self.viewer.close()
            self.viewer = None

+
 class LunarLanderContinuous(LunarLander):
    continuous = True

 def heuristic(env, s):
-    # Heuristic for:
-    # 1. Testing. 
-    # 2. Demonstration rollout.
-    angle_targ = s[0]*0.5 + s[2]*1.0         # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
+    """
+    The heuristic for
+    1. Testing
+    2. Demonstration rollout.
+
+    Args:
+        env: The environment
+        s (list): The state. Attributes:
+                  s[0] is the horizontal coordinate
+                  s[1] is the vertical coordinate
+                  s[2] is the horizontal speed
+                  s[3] is the vertical speed
+                  s[4] is the angle
+                  s[5] is the angular speed
+                  s[6] 1 if first leg has contact, else 0
+                  s[7] 1 if second leg has contact, else 0
+    returns:
+         a: The heuristic to be fed into the step function defined above to determine the next step and reward.
+    """
+
+    angle_targ = s[0]*0.5 + s[2]*1.0         # angle should point towards center
    if angle_targ > 0.4: angle_targ = 0.4    # more than 0.4 radians (22 degrees) is bad
    if angle_targ < -0.4: angle_targ = -0.4
-    hover_targ = 0.55*np.abs(s[0])           # target y should be proporional to horizontal offset
+    hover_targ = 0.55*np.abs(s[0])           # target y should be proportional to horizontal offset

-    # PID controller: s[4] angle, s[5] angularSpeed
    angle_todo = (angle_targ - s[4]) * 0.5 - (s[5])*1.0
-    #print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
-
-    # PID controller: s[1] vertical coordinate s[3] vertical speed
    hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
-    #print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))

    if s[6] or s[7]:  # legs have contact
        angle_todo = 0
@@ -416,5 +451,3 @@ def demo_heuristic_lander(env, seed=None, render=False):

 if __name__ == '__main__':
    demo_heuristic_lander(LunarLander(), render=True)
-    
-    
--- a/gym/envs/classic_control/acrobot.py
+++ b/gym/envs/classic_control/acrobot.py
@@ -214,13 +214,17 @@ class AcrobotEnv(core.Env):
            self.viewer = None

 def wrap(x, m, M):
-    """
-    :param x: a scalar
-    :param m: minimum possible value in range
-    :param M: maximum possible value in range
-    Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
+    """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
    truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
    For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
+
+    Args:
+        x: a scalar
+        m: minimum possible value in range
+        M: maximum possible value in range
+
+    Returns:
+        x: a scalar, wrapped
    """
    diff = M - m
    while x > M:
@@ -230,10 +234,14 @@ def wrap(x, m, M):
    return x

 def bound(x, m, M=None):
-    """
-    :param x: scalar
-    Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
+    """Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
    have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
+
+    Args:
+        x: scalar
+
+    Returns:
+        x: scalar, bound between min (m) and Max (M)
    """
    if M is None:
        M = m[1]
@@ -248,17 +256,14 @@ def rk4(derivs, y0, t, *args, **kwargs):
    This is a toy implementation which may be useful if you find
    yourself stranded on a system w/o scipy.  Otherwise use
    :func:`scipy.integrate`.
-    *y0*
-        initial state vector
-    *t*
-        sample times
-    *derivs*
-        returns the derivative of the system and has the
-        signature ``dy = derivs(yi, ti)``
-    *args*
-        additional arguments passed to the derivative function
-    *kwargs*
-        additional keyword arguments passed to the derivative function
+
+    Args:
+        derivs: the derivative of the system and has the signature ``dy = derivs(yi, ti)``
+        y0: initial state vector
+        t: sample times
+        args: additional arguments passed to the derivative function
+        kwargs: additional keyword arguments passed to the derivative function
+
    Example 1 ::
        ## 2D system
        def derivs6(x,t):
@@ -278,6 +283,9 @@ def rk4(derivs, y0, t, *args, **kwargs):
        yout = rk4(derivs, y0, t)
    If you have access to scipy, you should probably be using the
    scipy.integrate tools rather than this function.
+
+    Returns:
+        yout: Runge-Kutta approximation of the ODE
    """

    try:
--- a/gym/envs/classic_control/cartpole.py
+++ b/gym/envs/classic_control/cartpole.py
@@ -13,7 +13,8 @@ import numpy as np
 class CartPoleEnv(gym.Env):
    """
    Description:
-        A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
+        A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum
+        starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.

    Source:
        This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
@@ -32,7 +33,9 @@ class CartPoleEnv(gym.Env):
        0	Push cart to the left
        1	Push cart to the right
        
-        Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
+        Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is
+        pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the
+        cart underneath it

    Reward:
        Reward is 1 for every step taken, including the termination step