Box2d initial, second attempt

2025-08-19 13:32:03 +00:00 · 2016-05-03 22:27:42 +03:00
parent 386096f60a
commit 3b19acdfce
7 changed files with 1009 additions and 3 deletions
--- a/examples/agents/keyboard_agent.py
+++ b/examples/agents/keyboard_agent.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+import sys, gym
+
+#
+# Test yourself as a learning agent! Pass environment name as a command-line argument.
+#
+
+env = gym.make('LunarLander-v0' if len(sys.argv)<2 else sys.argv[1])
+
+ACTIONS = env.action_space.n
+ROLLOUT_TIME = 1000
+SKIP_CONTROL = 0    # Use previous control decision SKIP_CONTROL times, that's how you
+                    # can test what skip is still usable.
+
+human_agent_action = 0
+human_wants_restart = False
+human_sets_pause = False
+
+def key_press(key, mod):
+    global human_agent_action, human_wants_restart, human_sets_pause
+    if key==0xff0d: human_wants_restart = True
+    if key==32: human_sets_pause = not human_sets_pause
+    a = key - ord('0')
+    if a <= 0 or a >= ACTIONS: return
+    human_agent_action = a
+
+def key_release(key, mod):
+    global human_agent_action
+    a = key - ord('0')
+    if a <= 0 or a >= ACTIONS: return
+    if human_agent_action == a:
+        human_agent_action = 0
+
+env.render()
+env.viewer.window.on_key_press = key_press
+env.viewer.window.on_key_release = key_release
+
+def rollout(env):
+    global human_agent_action, human_wants_restart, human_sets_pause
+    human_wants_restart = False
+    obser = env.reset()
+    skip = 0
+    for t in xrange(ROLLOUT_TIME):
+        if not skip:
+            #print "taking action {}".format(human_agent_action)
+            a = human_agent_action
+            skip = SKIP_CONTROL
+        else:
+            skip -= 1
+
+        obser, r, done, info = env.step(a)
+        env.render()
+        if done: break
+        if human_wants_restart: break
+        while human_sets_pause:
+            env.render()
+            import time
+            time.sleep(0.1)
+
+print "ACTIONS={}".format(ACTIONS)
+print "Press keys 1 2 3 ... to take actions 1 2 3 ..."
+print "No keys pressed is taking action 0"
+
+while 1:
+    rollout(env)
--- a/examples/agents/random_agent.py
+++ b/examples/agents/random_agent.py
@@ -1,5 +1,5 @@
 import logging
-import os
+import os, sys

 import gym

@@ -14,11 +14,11 @@ class RandomAgent(object):
 if __name__ == '__main__':
    # You can optionally set up the logger. Also fine to set the level
    # to logging.DEBUG or logging.WARN if you want to change the
-    # amount of outut.
+    # amount of output.
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

-    env = gym.make('CartPole-v0')
+    env = gym.make('CartPole-v0' if len(sys.argv)<2 else sys.argv[1])
    agent = RandomAgent(env.action_space)

    # You provide the directory to write to (can be an existing
--- a/gym/envs/init.py
+++ b/gym/envs/init.py
@@ -79,6 +79,37 @@ register(
    reward_threshold=-100
 )

+# Box2d
+# ----------------------------------------
+
+register(
+    id='CartPoleSwingUp-v0',
+    entry_point='gym.envs.box2d:CartPoleSwingUp',
+    timestep_limit=200,
+    reward_threshold=100,
+)
+
+register(
+    id='LunarLander-v0',
+    entry_point='gym.envs.box2d:LunarLander',
+    timestep_limit=300,
+    reward_threshold=1,
+)
+
+register(
+    id='BipedalWalker-v0',
+    entry_point='gym.envs.box2d:BipedalWalker',
+    timestep_limit=1000,
+    reward_threshold=1.5,
+)
+
+register(
+    id='BipedalWalkerHardcore-v0',
+    entry_point='gym.envs.box2d:BipedalWalkerHardcore',
+    timestep_limit=1000,
+    reward_threshold=1.5,
+)
+
 # Toy Text
 # ----------------------------------------

--- a/gym/envs/box2d/init.py
+++ b/gym/envs/box2d/init.py
@@ -0,0 +1,5 @@
+from gym.envs.box2d.lunar_lander import LunarLander
+from gym.envs.box2d.cartpole_swingup import CartPoleSwingUp
+from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
+
+
--- a/gym/envs/box2d/bipedal_walker.py
+++ b/gym/envs/box2d/bipedal_walker.py
@@ -0,0 +1,442 @@
+import sys
+import numpy as np
+
+import gym
+import math
+
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+
+from gym import spaces
+import numpy as np
+
+FPS    = 50
+SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
+
+MOTORS_TORQUE = 40
+SPEED_HIP     = 2
+SPEED_KNEE    = 3
+LIDAR_RANGE   = 140/SCALE
+
+INITIAL_RANDOM = 5
+
+HULL_POLY =[
+    (-30,+9), (+6,+9), (+34,+1),
+    (+34,-8), (-30,-8)
+    ]
+LEG_DOWN = -8/SCALE
+LEG_W, LEG_H = 8/SCALE, 34/SCALE
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+TERRAIN_STEP   = 14/SCALE
+TERRAIN_LENGTH = 200     # in steps
+TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
+TERRAIN_GRASS    = 10    # low long are grass spots, in steps
+TERRAIN_STARTPAD = 20    # in steps
+
+class ContactDetector(contactListener):
+    def __init__(self, env):
+            contactListener.__init__(self)
+            self.env = env
+    def BeginContact(self, contact):
+        if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
+            self.env.game_over = True
+
+class BipedalWalker(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : FPS
+    }
+
+    hardcore = False
+
+    def __init__(self):
+        self.viewer = None
+
+        high = np.array([np.inf, np.inf, np.inf, np.inf, np.inf])
+        self.action_space = spaces.Box( np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]) )
+        self.observation_space = spaces.Box(-high, high)
+
+        self.world = Box2D.b2World(contactListener=ContactDetector(self))
+        self.terrain = None
+        self.hull = None
+
+        self.prev_shaping = None
+        self._reset()
+
+    def _destroy(self):
+        if not self.terrain: return
+        for t in self.terrain:
+            self.world.DestroyBody(t)
+        self.terrain = []
+        self.world.DestroyBody(self.hull)
+        self.hull = None
+        for leg in self.legs:
+            self.world.DestroyBody(leg)
+        self.legs = []
+        self.joints = []
+
+    def _generate_terrain(self, hardcore):
+        GRASS, STUMP, STAIRS, PIT, _STATES_ = xrange(5)
+        state    = GRASS
+        velocity = 0.0
+        y        = TERRAIN_HEIGHT
+        counter  = TERRAIN_STARTPAD
+        oneshot  = False
+        self.terrain   = []
+        self.terrain_x = []
+        self.terrain_y = []
+        for i in xrange(TERRAIN_LENGTH):
+            x = i*TERRAIN_STEP
+            self.terrain_x.append(x)
+
+            if state==GRASS and not oneshot:
+                velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
+                if i > TERRAIN_STARTPAD: velocity += np.random.uniform(-1, 1)/SCALE   #1
+                y += velocity
+
+            elif state==PIT and oneshot:
+                counter = np.random.randint(3, 5)
+                poly = [
+                    (x,              y),
+                    (x+TERRAIN_STEP, y),
+                    (x+TERRAIN_STEP, y-4*TERRAIN_STEP),
+                    (x,              y-4*TERRAIN_STEP),
+                    ]
+                t = self.world.CreateStaticBody(
+                    fixtures = fixtureDef(
+                        shape=polygonShape(vertices=poly),
+                        friction = 0.1
+                    ))
+                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                self.terrain.append(t)
+                t = self.world.CreateStaticBody(
+                    fixtures = fixtureDef(
+                        shape=polygonShape(vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]),
+                        friction = 0.1
+                    ))
+                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                self.terrain.append(t)
+                counter += 2
+                original_y = y
+
+            elif state==PIT and not oneshot:
+                y = original_y
+                if counter > 1:
+                    y -= 4*TERRAIN_STEP
+
+            elif state==STUMP and oneshot:
+                counter = np.random.randint(1, 3)
+                poly = [
+                    (x,                      y),
+                    (x+counter*TERRAIN_STEP, y),
+                    (x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
+                    (x,                      y+counter*TERRAIN_STEP),
+                    ]
+                t = self.world.CreateStaticBody(
+                    fixtures = fixtureDef(
+                        shape=polygonShape(vertices=poly),
+                        friction = 0.1
+                    ))
+                t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                self.terrain.append(t)
+
+            elif state==STAIRS and oneshot:
+                stair_height = +1 if np.random.ranf() > 0.5 else -1
+                stair_width = np.random.randint(4, 5)
+                stair_steps = np.random.randint(3, 5)
+                original_y = y
+                for s in xrange(stair_steps):
+                    poly = [
+                        (x+(    s*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
+                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(   s*stair_height)*TERRAIN_STEP),
+                        (x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
+                        (x+(    s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
+                        ]
+                    t = self.world.CreateStaticBody(
+                        fixtures = fixtureDef(
+                            shape=polygonShape(vertices=poly),
+                            friction = 0.1
+                        ))
+                    t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
+                    self.terrain.append(t)
+                counter = stair_steps*stair_width
+
+            elif state==STAIRS and not oneshot:
+                s = stair_steps*stair_width - counter - stair_height
+                n = s/stair_width
+                y = original_y + (n*stair_height)*TERRAIN_STEP
+
+            oneshot = False
+            self.terrain_y.append(y)
+            counter -= 1
+            if counter==0:
+                counter = np.random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
+                if state==GRASS and hardcore:
+                    state = np.random.randint(1, _STATES_)
+                    oneshot = True
+                else:
+                    state = GRASS
+                    oneshot = True
+
+        self.terrain_poly = []
+        for i in xrange(TERRAIN_LENGTH-1):
+            poly = [
+                (self.terrain_x[i],   self.terrain_y[i]),
+                (self.terrain_x[i+1], self.terrain_y[i+1])
+                ]
+            t = self.world.CreateStaticBody(
+                fixtures = fixtureDef(
+                    shape=edgeShape(vertices=poly),
+                    friction = 0.1,
+                    categoryBits=0x0001,
+                ))
+            color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
+            t.color1 = color
+            t.color2 = color
+            self.terrain.append(t)
+            color = (0.4, 0.6, 0.3)
+            poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
+            self.terrain_poly.append( (poly, color) )
+        self.terrain.reverse()
+
+    def _generate_clouds(self):
+        # Sorry for the clouds, couldn't resist
+        self.cloud_poly   = []
+        for i in xrange(TERRAIN_LENGTH/20):
+            x = np.random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
+            y = VIEWPORT_H/SCALE*3/4
+            poly = [
+                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP),
+                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP) )
+                for a in xrange(5) ]
+            x1 = min( [p[0] for p in poly] )
+            x2 = max( [p[0] for p in poly] )
+            self.cloud_poly.append( (poly,x1,x2) )
+
+    def _reset(self):
+        self._destroy()
+        self.game_over = False
+        self.prev_shaping = None
+        self.scroll = 0.0
+
+        W = VIEWPORT_W/SCALE
+        H = VIEWPORT_H/SCALE
+
+        self._generate_terrain(self.hardcore)
+        self._generate_clouds()
+
+        init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
+        init_y = TERRAIN_HEIGHT+2*LEG_H
+        self.hull = self.world.CreateDynamicBody(
+            position = (init_x, init_y),
+            fixtures = fixtureDef(
+                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
+                density=5.0,
+                friction=0.1,
+                categoryBits=0x0020,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.0) # 0.99 bouncy
+                )
+        self.hull.color1 = (0.5,0.4,0.9)
+        self.hull.color2 = (0.3,0.3,0.5)
+        self.hull.ApplyForceToCenter((np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
+
+        self.legs = []
+        self.joints = []
+        for i in [-1,+1]:
+            leg = self.world.CreateDynamicBody(
+                position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
+                angle = (i*0.05),
+                fixtures = fixtureDef(
+                    shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
+                    density=1.0,
+                    restitution=0.0,
+                    categoryBits=0x0020,
+                    maskBits=0x001)
+                )
+            leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
+            leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
+            rjd = revoluteJointDef(
+                bodyA=self.hull,
+                bodyB=leg,
+                localAnchorA=(0, LEG_DOWN),
+                localAnchorB=(0, LEG_H/2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed = i,
+                lowerAngle = -0.8,
+                upperAngle = 1.1,
+                )
+            self.legs.append(leg)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+            lower = self.world.CreateDynamicBody(
+                position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
+                angle = (i*0.05),
+                fixtures = fixtureDef(
+                    shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
+                    density=1.0,
+                    restitution=0.0,
+                    categoryBits=0x0020,
+                    maskBits=0x001)
+                )
+            lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
+            lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
+            rjd = revoluteJointDef(
+                bodyA=leg,
+                bodyB=lower,
+                localAnchorA=(0, -LEG_H/2),
+                localAnchorB=(0, LEG_H/2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed = 1,
+                lowerAngle = -1.6,
+                upperAngle = -0.1,
+                )
+            self.legs.append(lower)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+        self.drawlist = self.terrain + self.legs + [self.hull]
+
+        return self._step(np.array([0,0,0,0]))[0]
+
+    def _step(self, action):
+        #self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
+        control_speed = False  # Should be easier as well
+        if control_speed:
+            self.joints[0].motorSpeed = SPEED_HIP  * np.clip(-1, 1, action[0])
+            self.joints[1].motorSpeed = SPEED_KNEE * np.clip(-1, 1, action[1])
+            self.joints[2].motorSpeed = SPEED_HIP  * np.clip(-1, 1, action[2])
+            self.joints[2].motorSpeed = SPEED_KNEE * np.clip(-1, 1, action[3])
+        else:
+            self.joints[0].motorSpeed     = SPEED_HIP     * np.sign(action[0])
+            self.joints[0].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[0]))
+            self.joints[1].motorSpeed     = SPEED_KNEE    * np.sign(action[1])
+            self.joints[1].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[1]))
+            self.joints[2].motorSpeed     = SPEED_HIP     * np.sign(action[2])
+            self.joints[2].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[2]))
+            self.joints[3].motorSpeed     = SPEED_KNEE    * np.sign(action[3])
+            self.joints[3].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[3]))
+
+        self.world.Step(1.0/FPS, 6*30, 2*30)
+
+        pos = self.hull.position
+        vel = self.hull.linearVelocity
+
+        class LidarCallback(Box2D.b2.rayCastCallback):
+            def ReportFixture(self, fixture, point, normal, fraction):
+                if (fixture.filterData.categoryBits & 1) == 0:
+                    return 1
+                self.p2 = point
+                self.fraction = fraction
+                return 0
+        self.lidar = [LidarCallback() for _ in xrange(10)]
+        for i in xrange(10):
+            self.lidar[i].fraction = 1.0
+            self.lidar[i].p1 = pos
+            self.lidar[i].p2 = (
+                pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
+                pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
+            self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
+
+        state = [
+            self.hull.angle,        # Normal angles up to 0.5 here, but sure more is possible.
+            0.2*self.hull.angularVelocity,
+            vel.x*(VIEWPORT_W/SCALE)/FPS,
+            vel.y*(VIEWPORT_H/SCALE)/FPS,
+            self.joints[0].angle,   # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
+            self.joints[0].speed / SPEED_HIP,
+            self.joints[1].angle + 1.0,
+            self.joints[1].speed / SPEED_KNEE,
+            self.joints[2].angle,
+            self.joints[2].speed / SPEED_HIP,
+            self.joints[3].angle + 1.0,
+            self.joints[3].speed / SPEED_KNEE
+            ]
+        state += [l.fraction for l in self.lidar]
+        #print " ".join( ["%+0.2f" % x for x in state] )
+
+        self.scroll = pos.x - VIEWPORT_W/SCALE/5
+
+
+        shaping  = pos[0]/SCALE       # moving forward is a way to receive reward (up to 2.0 on 1000 rollout time)
+        shaping -= 0.1*abs(state[0])  # keep head straight, other than that and falling, any behavior is unpunished
+        #print "shaping", shaping
+
+        reward = 0
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        done = False
+        if self.game_over or pos[0] < 0:
+            done   = True
+            reward = -1
+        return np.array(state), reward, done, {}
+
+    def _render(self, mode='human', close=False):
+        if close:
+            if self.viewer is not None:
+                self.viewer.close()
+            return
+
+        from gym.envs.classic_control import rendering
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
+        self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
+
+        self.viewer.draw_polygon( [
+            (self.scroll,                  0),
+            (self.scroll+VIEWPORT_W/SCALE, 0),
+            (self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
+            (self.scroll,                  VIEWPORT_H/SCALE),
+            ], color=(0.9, 0.9, 1.0) )
+        for poly,x1,x2 in self.cloud_poly:
+            if x2 < self.scroll/2: continue
+            if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
+            self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
+        for poly, color in self.terrain_poly:
+            if poly[1][0] < self.scroll: continue
+            if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
+            self.viewer.draw_polygon(poly, color=color)
+
+        if np.random.random() > 0.5:
+            l = np.random.choice(self.lidar)
+            self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
+
+        for obj in self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    t = rendering.Transform(translation=trans*f.shape.pos)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                else:
+                    path = [trans*v for v in f.shape.vertices]
+                    self.viewer.draw_polygon(path, color=obj.color1)
+                    path.append(path[0])
+                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
+
+        flagy1 = TERRAIN_HEIGHT
+        flagy2 = flagy1 + 50/SCALE
+        x = TERRAIN_STEP*3
+        self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
+        f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
+        self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
+        self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
+
+        self.viewer.render()
+        if mode == 'rgb_array':
+            return self.viewer.get_array()
+        elif mode is 'human':
+            pass
+        else:
+            return super(BipedalWalker, self).render(mode=mode)
+
+class BipedalWalkerHardcore(BipedalWalker):
+    hardcore = True
--- a/gym/envs/box2d/cartpole_swingup.py
+++ b/gym/envs/box2d/cartpole_swingup.py
@@ -0,0 +1,178 @@
+import numpy as np
+import gym
+import math
+
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef)
+
+from gym import spaces
+
+FPS   = 50
+SCALE = 120.0  # affects how fast-paced the game is
+FORCE = 1.9    # 1.6 for underpowered setting, will need 3-4 swings to go up, longer episode
+
+CART_WIDTH  = 25 / SCALE
+CART_HEIGHT = 8  / SCALE
+POLE_LENGTH = 60 / SCALE
+POLE_WIDTH  = 2  / SCALE
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+INITIAL_RANDOM = 10.0
+
+class CartPoleSwingUp(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : FPS
+    }
+
+    def __init__(self):
+        self.viewer = None
+
+        high = np.array([np.inf, np.inf, np.inf, np.inf, np.inf]) # useful range is -1 .. +1
+        self.action_space = spaces.Discrete(3)  # nop, left, right
+        self.observation_space = spaces.Box(-high, high)
+
+        self.world = Box2D.b2World()
+        self.state = np.random.uniform(low=-0.05, high=0.05, size=(4,))
+        self.cart = None
+        self.pole = None
+
+        self.floor_body = self.world.CreateStaticBody(
+            position = (VIEWPORT_W/SCALE/2, VIEWPORT_H/SCALE/4 - CART_HEIGHT),
+            fixtures = fixtureDef(
+                shape=polygonShape(box=(VIEWPORT_W/SCALE/2, VIEWPORT_H/SCALE/4)),
+                friction=0.1
+                )
+            )
+        self.floor_body.color1 = (0.6,0.9,0.6)
+        self.floor_body.color2 = (0.6,0.9,0.6)
+
+        self.prev_estimate = None
+        self._reset()
+
+    def _destroy(self):
+        if not self.cart: return
+        self.world.DestroyBody(self.cart)
+        self.cart = None
+        self.world.DestroyBody(self.pole)
+        self.pole = None
+        self.joint = None  # joint itself destroyed with bodies
+
+    def _reset(self):
+        self._destroy()
+
+        self.cart = self.world.CreateDynamicBody(
+            position = (VIEWPORT_W/SCALE/2, VIEWPORT_H/SCALE/2 + CART_HEIGHT/2),
+            #angle=1.1,
+            fixtures = fixtureDef(
+                shape=polygonShape(box=(CART_WIDTH,CART_HEIGHT)),
+                density=5.0,
+                friction=0.1,
+                categoryBits=0x0010,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.3) # 0.99 bouncy
+                )
+        self.cart.color1 = (0.5,0.4,0.9)
+        self.cart.color2 = (0.3,0.3,0.5)
+        self.cart.ApplyForceToCenter( (np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
+
+        self.pole = self.world.CreateDynamicBody(
+            position = (VIEWPORT_W/SCALE/2, VIEWPORT_H/SCALE/2 + CART_HEIGHT/2 - POLE_LENGTH),
+            angle = (0.0),
+            fixtures = [
+                fixtureDef(
+                    shape=polygonShape(box=(POLE_WIDTH,POLE_LENGTH)),
+                    density=1.0,
+                    categoryBits=0x0020,
+                    maskBits=0x000), # don't collide at all
+                fixtureDef(
+                    shape=circleShape(radius=POLE_WIDTH*2, pos=(0,-POLE_LENGTH)),
+                    density=1.0,
+                    categoryBits=0x0020,
+                    maskBits=0x000)],
+                )
+        self.pole.color1 = (1.0,0,0.0)
+        self.pole.color2 = (0.6,0,0.0)
+
+        rjd = revoluteJointDef(
+            bodyA=self.cart,
+            bodyB=self.pole,
+            localAnchorA=(0, 0),
+            localAnchorB=(0, POLE_LENGTH)
+            )
+        self.joint = self.world.CreateJoint(rjd)
+
+        self.drawlist = [self.floor_body, self.cart, self.pole]
+
+        return self._step(0)[0]
+
+    def _step(self, action):
+        assert action==0 or action==1 or action==2, "%r (%s) invalid " % (action,type(action))
+
+        if action != 0:
+            self.cart.ApplyForceToCenter((-FORCE if action==1 else +FORCE, 0), True)
+
+        self.world.Step(1.0/FPS, 6*30, 2*30)
+
+        pos = self.cart.position
+        vel = self.cart.linearVelocity
+        state = [
+            (pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
+            vel.x*(VIEWPORT_W/SCALE/2)/FPS,
+            math.sin( self.pole.angle ),
+            math.cos( self.pole.angle ),
+            0.2*self.pole.angularVelocity
+            ]
+
+        estimate  = -state[3]        # state[3] is -1 when the pole is up, so it's +1.0 in up position
+        estimate -= abs(state[0])    # reduced by offset from center
+
+        potential = False   # potential is easier to train
+        if potential:
+            # total reward received will be 2.0: from -1.0 (downward in center) to +1.0 (upward in center)
+            reward = 0
+            if self.prev_estimate is not None:
+                reward = estimate - self.prev_estimate
+            self.prev_estimate = estimate
+        else:
+            reward = estimate
+
+        done = abs(state[0]) >= 1.0
+        reward = estimate
+        if done: reward = -1.0
+        return np.array(state), reward, done, {}
+
+    def _render(self, mode='human', close=False):
+        if close:
+            if self.viewer is not None:
+                self.viewer.close()
+            return
+
+        from gym.envs.classic_control import rendering
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
+            self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
+
+        for obj in self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    t = rendering.Transform(translation=trans*f.shape.pos)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                else:
+                    path = [trans*v for v in f.shape.vertices]
+                    self.viewer.draw_polygon(path, color=obj.color1)
+                    path.append(path[0])
+                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
+
+        self.viewer.render()
+        if mode == 'rgb_array':
+            return self.viewer.get_array()
+        elif mode is 'human':
+            pass
+        else:
+            return super(CartPoleSwingUp, self).render(mode=mode)
+
--- a/gym/envs/box2d/lunar_lander.py
+++ b/gym/envs/box2d/lunar_lander.py
@@ -0,0 +1,285 @@
+import sys, math
+import numpy as np
+
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+
+import gym
+from gym import spaces
+
+FPS    = 50
+SCALE  = 30.0   # affects how fast-paced the game is, forces should be adjusted as well
+
+MAIN_ENGINE_POWER  = 13.0
+SIDE_ENGINE_POWER  =  0.6
+
+INITIAL_RANDOM = 500.0
+
+LANDER_POLY =[
+    (-14,+17), (-17,0), (-17,-10),
+    (+17,-10), (+17,0), (+14,+17)
+    ]
+LEG_AWAY = 20
+LEG_DOWN = 18
+LEG_W, LEG_H = 2, 8
+LEG_SPRING_TORQUE = 40
+
+SIDE_ENGINE_HEIGHT = 14.0
+SIDE_ENGINE_AWAY   = 12.0
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+class ContactDetector(contactListener):
+    def __init__(self, env):
+            contactListener.__init__(self)
+            self.env = env
+    def BeginContact(self, contact):
+        if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
+            self.env.game_over = True
+
+class LunarLander(gym.Env):
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second' : FPS
+    }
+
+    def __init__(self):
+        self.viewer = None
+
+        high = np.array([np.inf, np.inf, np.inf, np.inf, np.inf]) # useful range is -1 .. +1
+        self.action_space = spaces.Discrete(4)                    # nop, fire left engine, main engine, right engine
+        self.observation_space = spaces.Box(-high, high)
+
+        self.world = Box2D.b2World(contactListener=ContactDetector(self))
+        self.moon = None
+        self.lander = None
+        self.particles = []
+
+        self.prev_reward = None
+        self._reset()
+
+    def _destroy(self):
+        if not self.moon: return
+        self._clean_particles(True)
+        self.world.DestroyBody(self.moon)
+        self.moon = None
+        self.world.DestroyBody(self.lander)
+        self.lander = None
+        self.world.DestroyBody(self.legs[0])
+        self.world.DestroyBody(self.legs[1])
+
+    def _reset(self):
+        self._destroy()
+        self.game_over = False
+        self.prev_shaping = None
+
+        W = VIEWPORT_W/SCALE
+        H = VIEWPORT_H/SCALE
+
+        # terrain
+        CHUNKS = 11
+        height = np.random.uniform(0, H/2, size=(CHUNKS+1,) )
+        chunk_x  = [W/(CHUNKS-1)*i for i in xrange(CHUNKS)]
+        self.helipad_x1 = chunk_x[CHUNKS//2-1]
+        self.helipad_x2 = chunk_x[CHUNKS//2+1]
+        self.helipad_y  = H/4
+        height[CHUNKS//2-2] = self.helipad_y
+        height[CHUNKS//2-1] = self.helipad_y
+        height[CHUNKS//2+0] = self.helipad_y
+        height[CHUNKS//2+1] = self.helipad_y
+        height[CHUNKS//2+2] = self.helipad_y
+        smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in xrange(CHUNKS)]
+
+        self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
+        self.sky_polys = []
+        for i in xrange(CHUNKS-1):
+            p1 = (chunk_x[i],   smooth_y[i])
+            p2 = (chunk_x[i+1], smooth_y[i+1])
+            self.moon.CreateEdgeFixture(
+                vertices=[p1,p2],
+                density=0,
+                friction=0.1)
+            self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
+
+        self.moon.color1 = (0.0,0.0,0.0)
+        self.moon.color2 = (0.0,0.0,0.0)
+
+        initial_y = VIEWPORT_H/SCALE
+        self.lander = self.world.CreateDynamicBody(
+            position = (VIEWPORT_W/SCALE/2, initial_y),
+            angle=0.0,
+            fixtures = fixtureDef(
+                shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
+                density=5.0,
+                friction=0.1,
+                categoryBits=0x0010,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.0) # 0.99 bouncy
+                )
+        self.lander.color1 = (0.5,0.4,0.9)
+        self.lander.color2 = (0.3,0.3,0.5)
+        self.lander.ApplyForceToCenter( (
+            np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
+            np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
+            ), True)
+
+        self.legs = []
+        for i in [-1,+1]:
+            leg = self.world.CreateDynamicBody(
+                position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
+                angle = (i*0.05),
+                fixtures = fixtureDef(
+                    shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
+                    density=1.0,
+                    restitution=0.0,
+                    categoryBits=0x0020,
+                    maskBits=0x001)
+                )
+            leg.color1 = (0.5,0.4,0.9)
+            leg.color2 = (0.3,0.3,0.5)
+            rjd = revoluteJointDef(
+                bodyA=self.lander,
+                bodyB=leg,
+                localAnchorA=(0, 0),
+                localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=LEG_SPRING_TORQUE,
+                motorSpeed=+0.3*i  # low enough not to jump back into the sky
+                )
+            if i==-1:
+                rjd.lowerAngle = +0.9 - 0.5  # Yes, the most esoteric numbers here, angles legs have freedom to travel within
+                rjd.upperAngle = +0.9
+            else:
+                rjd.lowerAngle = -0.9
+                rjd.upperAngle = -0.9 + 0.5
+            leg.joint = self.world.CreateJoint(rjd)
+            self.legs.append(leg)
+
+        self.drawlist = [self.lander] + self.legs
+
+        return self._step(0)[0]
+
+    def _create_particle(self, mass, x, y):
+        p = self.world.CreateDynamicBody(
+            position = (x,y),
+            angle=0.0,
+            fixtures = fixtureDef(
+                shape=circleShape(radius=2/SCALE, pos=(0,0)),
+                density=mass,
+                friction=0.1,
+                categoryBits=0x0100,
+                maskBits=0x001,  # collide only with ground
+                restitution=0.9)
+                )
+        p.ttl = 1
+        self.particles.append(p)
+        self._clean_particles(False)
+        return p
+
+    def _clean_particles(self, all):
+        while self.particles and (all or self.particles[0].ttl<0):
+            self.world.DestroyBody(self.particles.pop(0))
+
+    def _step(self, action):
+        assert action in [0,1,2,3], "%r (%s) invalid " % (action,type(action))
+
+        # Engines
+        tip  = (math.sin(self.lander.angle), math.cos(self.lander.angle))
+        side = (-tip[1], tip[0]);
+        dispersion = [np.random.uniform(-1.0, +1.0) / SCALE for _ in xrange(2)]
+        if action==2: # Main engine
+            ox =  tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1]   # 4 is move a bit downwards, +-2 for randomness
+            oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
+            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
+            p = self._create_particle(3.5, *impulse_pos)    # particles are just a decoration, 3.5 is here to make particle speed adequate
+            p.ApplyLinearImpulse(           ( ox*MAIN_ENGINE_POWER,  oy*MAIN_ENGINE_POWER), impulse_pos, True)
+            self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER, -oy*MAIN_ENGINE_POWER), impulse_pos, True)
+
+        if action==1 or action==3: # Orientation engines
+            direction = action-2
+            ox =  tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
+            oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
+            impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
+            p = self._create_particle(0.7, *impulse_pos)
+            p.ApplyLinearImpulse(           ( ox*SIDE_ENGINE_POWER,  oy*SIDE_ENGINE_POWER), impulse_pos, True)
+            self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER, -oy*SIDE_ENGINE_POWER), impulse_pos, True)
+
+        self.world.Step(1.0/FPS, 6*30, 2*30)
+
+        pos = self.lander.position
+        vel = self.lander.linearVelocity
+        state = [
+            (pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
+            (pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_W/SCALE/2),
+            vel.x*(VIEWPORT_W/SCALE/2)/FPS,
+            vel.y*(VIEWPORT_H/SCALE/2)/FPS,
+            0.2*self.lander.angularVelocity
+            ]
+        #print np.array(state)
+
+        reward = 0
+        shaping = - abs(state[0]) - abs(state[1]) - abs(state[2]) - abs(state[3]) - abs(state[4])
+        #print "shaping", shaping
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        done = False
+        if self.game_over or abs(state[0]) >= 1.0:
+            done   = True
+            reward = -1
+        if not self.lander.awake:
+            done   = True
+            reward = +1
+        #print "REWARD", reward
+        return np.array(state), reward, done, {}
+
+    def _render(self, mode='human', close=False):
+        if close:
+            if self.viewer is not None:
+                self.viewer.close()
+            return
+
+        from gym.envs.classic_control import rendering
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
+            self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
+
+        for obj in self.particles:
+            obj.ttl -= 0.05
+            obj.color1 = (max(0.2,obj.ttl), 0.2, 0.2)
+            obj.color2 = (max(0.2,obj.ttl), 0.2, 0.2)
+
+        self._clean_particles(False)
+
+        for p in self.sky_polys:
+            self.viewer.draw_polygon(p, color=(0,0,0))
+
+        for obj in self.particles + self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    t = rendering.Transform(translation=trans*f.shape.pos)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
+                    self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
+                else:
+                    path = [trans*v for v in f.shape.vertices]
+                    self.viewer.draw_polygon(path, color=obj.color1)
+                    path.append(path[0])
+                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
+
+        for x in [self.helipad_x1, self.helipad_x2]:
+            flagy1 = self.helipad_y
+            flagy2 = flagy1 + 50/SCALE
+            self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
+            self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
+
+        self.viewer.render()
+        if mode == 'rgb_array':
+            return self.viewer.get_array()
+        elif mode is 'human':
+            pass
+        else:
+            return super(LunarLander, self).render(mode=mode)