Files
Gymnasium/gym/envs/box2d/bipedal_walker.py
2016-05-15 17:22:38 -07:00

445 lines
17 KiB
Python

import sys
import numpy as np
from six.moves import xrange
import gym
import math
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
from gym import spaces
import numpy as np
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MOTORS_TORQUE = 40
SPEED_HIP = 2
SPEED_KNEE = 3
LIDAR_RANGE = 140/SCALE
INITIAL_RANDOM = 5
HULL_POLY =[
(-30,+9), (+6,+9), (+34,+1),
(+34,-8), (-30,-8)
]
LEG_DOWN = -8/SCALE
LEG_W, LEG_H = 8/SCALE, 34/SCALE
VIEWPORT_W = 600
VIEWPORT_H = 400
TERRAIN_STEP = 14/SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
self.env.game_over = True
class BipedalWalker(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
hardcore = False
def __init__(self):
self.viewer = None
high = np.array([np.inf]*22)
self.action_space = spaces.Box( np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]) )
self.observation_space = spaces.Box(-high, high)
self.world = Box2D.b2World(contactListener=ContactDetector(self))
self.terrain = None
self.hull = None
self.prev_shaping = None
self._reset()
def _destroy(self):
if not self.terrain: return
for t in self.terrain:
self.world.DestroyBody(t)
self.terrain = []
self.world.DestroyBody(self.hull)
self.hull = None
for leg in self.legs:
self.world.DestroyBody(leg)
self.legs = []
self.joints = []
def _generate_terrain(self, hardcore):
GRASS, STUMP, STAIRS, PIT, _STATES_ = xrange(5)
state = GRASS
velocity = 0.0
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
self.terrain_x = []
self.terrain_y = []
for i in xrange(TERRAIN_LENGTH):
x = i*TERRAIN_STEP
self.terrain_x.append(x)
if state==GRASS and not oneshot:
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD: velocity += np.random.uniform(-1, 1)/SCALE #1
y += velocity
elif state==PIT and oneshot:
counter = np.random.randint(3, 5)
poly = [
(x, y),
(x+TERRAIN_STEP, y),
(x+TERRAIN_STEP, y-4*TERRAIN_STEP),
(x, y-4*TERRAIN_STEP),
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=poly),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter += 2
original_y = y
elif state==PIT and not oneshot:
y = original_y
if counter > 1:
y -= 4*TERRAIN_STEP
elif state==STUMP and oneshot:
counter = np.random.randint(1, 3)
poly = [
(x, y),
(x+counter*TERRAIN_STEP, y),
(x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
(x, y+counter*TERRAIN_STEP),
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=poly),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
elif state==STAIRS and oneshot:
stair_height = +1 if np.random.ranf() > 0.5 else -1
stair_width = np.random.randint(4, 5)
stair_steps = np.random.randint(3, 5)
original_y = y
for s in xrange(stair_steps):
poly = [
(x+( s*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
(x+( s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=poly),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter = stair_steps*stair_width
elif state==STAIRS and not oneshot:
s = stair_steps*stair_width - counter - stair_height
n = s/stair_width
y = original_y + (n*stair_height)*TERRAIN_STEP
oneshot = False
self.terrain_y.append(y)
counter -= 1
if counter==0:
counter = np.random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
if state==GRASS and hardcore:
state = np.random.randint(1, _STATES_)
oneshot = True
else:
state = GRASS
oneshot = True
self.terrain_poly = []
for i in xrange(TERRAIN_LENGTH-1):
poly = [
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i+1], self.terrain_y[i+1])
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=edgeShape(vertices=poly),
friction = 0.1,
categoryBits=0x0001,
))
color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
t.color1 = color
t.color2 = color
self.terrain.append(t)
color = (0.4, 0.6, 0.3)
poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
self.terrain_poly.append( (poly, color) )
self.terrain.reverse()
def _generate_clouds(self):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in xrange(TERRAIN_LENGTH//20):
x = np.random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
y = VIEWPORT_H/SCALE*3/4
poly = [
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP),
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP) )
for a in xrange(5) ]
x1 = min( [p[0] for p in poly] )
x2 = max( [p[0] for p in poly] )
self.cloud_poly.append( (poly,x1,x2) )
def _reset(self):
self._destroy()
self.game_over = False
self.prev_shaping = None
self.scroll = 0.0
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
self._generate_terrain(self.hardcore)
self._generate_clouds()
init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
init_y = TERRAIN_HEIGHT+2*LEG_H
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
)
self.hull.color1 = (0.5,0.4,0.9)
self.hull.color2 = (0.3,0.3,0.5)
self.hull.ApplyForceToCenter((np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
self.legs = []
self.joints = []
for i in [-1,+1]:
leg = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = fixtureDef(
shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=leg,
localAnchorA=(0, LEG_DOWN),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = i,
lowerAngle = -0.8,
upperAngle = 1.1,
)
self.legs.append(leg)
self.joints.append(self.world.CreateJoint(rjd))
lower = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = fixtureDef(
shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=leg,
bodyB=lower,
localAnchorA=(0, -LEG_H/2),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = 1,
lowerAngle = -1.6,
upperAngle = -0.1,
)
self.legs.append(lower)
self.joints.append(self.world.CreateJoint(rjd))
self.drawlist = self.terrain + self.legs + [self.hull]
return self._step(np.array([0,0,0,0]))[0]
def _step(self, action):
#self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(-1, 1, action[0]))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(-1, 1, action[1]))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(-1, 1, action[2]))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(-1, 1, action[3]))
else:
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
self.world.Step(1.0/FPS, 6*30, 2*30)
pos = self.hull.position
vel = self.hull.linearVelocity
class LidarCallback(Box2D.b2.rayCastCallback):
def ReportFixture(self, fixture, point, normal, fraction):
if (fixture.filterData.categoryBits & 1) == 0:
return 1
self.p2 = point
self.fraction = fraction
return 0
self.lidar = [LidarCallback() for _ in xrange(10)]
for i in xrange(10):
self.lidar[i].fraction = 1.0
self.lidar[i].p1 = pos
self.lidar[i].p2 = (
pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
state = [
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
0.2*self.hull.angularVelocity,
vel.x*(VIEWPORT_W/SCALE)/FPS,
vel.y*(VIEWPORT_H/SCALE)/FPS,
self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.joints[0].speed / SPEED_HIP,
self.joints[1].angle + 1.0,
self.joints[1].speed / SPEED_KNEE,
self.joints[2].angle,
self.joints[2].speed / SPEED_HIP,
self.joints[3].angle + 1.0,
self.joints[3].speed / SPEED_KNEE
]
state += [l.fraction for l in self.lidar]
#print " ".join( ["%+0.2f" % x for x in state] )
self.scroll = pos.x - VIEWPORT_W/SCALE/5
shaping = pos[0]/SCALE # moving forward is a way to receive reward (up to 2.0 on 1000 rollout time)
shaping -= 0.1*abs(state[0]) # keep head straight, other than that and falling, any behavior is unpunished
#print "shaping", shaping
reward = 0
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
done = False
if self.game_over or pos[0] < 0:
done = True
reward = -1
return np.array(state), reward, done, {}
def _render(self, mode='human', close=False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
self.viewer.draw_polygon( [
(self.scroll, 0),
(self.scroll+VIEWPORT_W/SCALE, 0),
(self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
(self.scroll, VIEWPORT_H/SCALE),
], color=(0.9, 0.9, 1.0) )
for poly,x1,x2 in self.cloud_poly:
if x2 < self.scroll/2: continue
if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll: continue
if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon(poly, color=color)
if np.random.random() > 0.5:
l = np.random.choice(self.lidar)
self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
flagy1 = TERRAIN_HEIGHT
flagy2 = flagy1 + 50/SCALE
x = TERRAIN_STEP*3
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
self.viewer.render()
if mode == 'rgb_array':
return self.viewer.get_array()
elif mode is 'human':
pass
else:
return super(BipedalWalker, self).render(mode=mode)
class BipedalWalkerHardcore(BipedalWalker):
hardcore = True