Files
Gymnasium/gym/envs/box2d/bipedal_walker.py

443 lines
17 KiB
Python
Raw Normal View History

2016-05-03 22:27:42 +03:00
import sys
import numpy as np
import gym
import math
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
from gym import spaces
import numpy as np
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MOTORS_TORQUE = 40
SPEED_HIP = 2
SPEED_KNEE = 3
LIDAR_RANGE = 140/SCALE
INITIAL_RANDOM = 5
HULL_POLY =[
(-30,+9), (+6,+9), (+34,+1),
(+34,-8), (-30,-8)
]
LEG_DOWN = -8/SCALE
LEG_W, LEG_H = 8/SCALE, 34/SCALE
VIEWPORT_W = 600
VIEWPORT_H = 400
TERRAIN_STEP = 14/SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
self.env.game_over = True
class BipedalWalker(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
hardcore = False
def __init__(self):
self.viewer = None
high = np.array([np.inf, np.inf, np.inf, np.inf, np.inf])
self.action_space = spaces.Box( np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]) )
self.observation_space = spaces.Box(-high, high)
self.world = Box2D.b2World(contactListener=ContactDetector(self))
self.terrain = None
self.hull = None
self.prev_shaping = None
self._reset()
def _destroy(self):
if not self.terrain: return
for t in self.terrain:
self.world.DestroyBody(t)
self.terrain = []
self.world.DestroyBody(self.hull)
self.hull = None
for leg in self.legs:
self.world.DestroyBody(leg)
self.legs = []
self.joints = []
def _generate_terrain(self, hardcore):
GRASS, STUMP, STAIRS, PIT, _STATES_ = xrange(5)
state = GRASS
velocity = 0.0
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
self.terrain_x = []
self.terrain_y = []
for i in xrange(TERRAIN_LENGTH):
x = i*TERRAIN_STEP
self.terrain_x.append(x)
if state==GRASS and not oneshot:
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD: velocity += np.random.uniform(-1, 1)/SCALE #1
y += velocity
elif state==PIT and oneshot:
counter = np.random.randint(3, 5)
poly = [
(x, y),
(x+TERRAIN_STEP, y),
(x+TERRAIN_STEP, y-4*TERRAIN_STEP),
(x, y-4*TERRAIN_STEP),
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=poly),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter += 2
original_y = y
elif state==PIT and not oneshot:
y = original_y
if counter > 1:
y -= 4*TERRAIN_STEP
elif state==STUMP and oneshot:
counter = np.random.randint(1, 3)
poly = [
(x, y),
(x+counter*TERRAIN_STEP, y),
(x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
(x, y+counter*TERRAIN_STEP),
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=poly),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
elif state==STAIRS and oneshot:
stair_height = +1 if np.random.ranf() > 0.5 else -1
stair_width = np.random.randint(4, 5)
stair_steps = np.random.randint(3, 5)
original_y = y
for s in xrange(stair_steps):
poly = [
(x+( s*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
(x+( s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=polygonShape(vertices=poly),
friction = 0.1
))
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter = stair_steps*stair_width
elif state==STAIRS and not oneshot:
s = stair_steps*stair_width - counter - stair_height
n = s/stair_width
y = original_y + (n*stair_height)*TERRAIN_STEP
oneshot = False
self.terrain_y.append(y)
counter -= 1
if counter==0:
counter = np.random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
if state==GRASS and hardcore:
state = np.random.randint(1, _STATES_)
oneshot = True
else:
state = GRASS
oneshot = True
self.terrain_poly = []
for i in xrange(TERRAIN_LENGTH-1):
poly = [
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i+1], self.terrain_y[i+1])
]
t = self.world.CreateStaticBody(
fixtures = fixtureDef(
shape=edgeShape(vertices=poly),
friction = 0.1,
categoryBits=0x0001,
))
color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
t.color1 = color
t.color2 = color
self.terrain.append(t)
color = (0.4, 0.6, 0.3)
poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
self.terrain_poly.append( (poly, color) )
self.terrain.reverse()
def _generate_clouds(self):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in xrange(TERRAIN_LENGTH/20):
x = np.random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
y = VIEWPORT_H/SCALE*3/4
poly = [
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP),
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP) )
for a in xrange(5) ]
x1 = min( [p[0] for p in poly] )
x2 = max( [p[0] for p in poly] )
self.cloud_poly.append( (poly,x1,x2) )
def _reset(self):
self._destroy()
self.game_over = False
self.prev_shaping = None
self.scroll = 0.0
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
self._generate_terrain(self.hardcore)
self._generate_clouds()
init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
init_y = TERRAIN_HEIGHT+2*LEG_H
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
)
self.hull.color1 = (0.5,0.4,0.9)
self.hull.color2 = (0.3,0.3,0.5)
self.hull.ApplyForceToCenter((np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
self.legs = []
self.joints = []
for i in [-1,+1]:
leg = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = fixtureDef(
shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=leg,
localAnchorA=(0, LEG_DOWN),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = i,
lowerAngle = -0.8,
upperAngle = 1.1,
)
self.legs.append(leg)
self.joints.append(self.world.CreateJoint(rjd))
lower = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = fixtureDef(
shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=leg,
bodyB=lower,
localAnchorA=(0, -LEG_H/2),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = 1,
lowerAngle = -1.6,
upperAngle = -0.1,
)
self.legs.append(lower)
self.joints.append(self.world.CreateJoint(rjd))
self.drawlist = self.terrain + self.legs + [self.hull]
return self._step(np.array([0,0,0,0]))[0]
def _step(self, action):
#self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = SPEED_HIP * np.clip(-1, 1, action[0])
self.joints[1].motorSpeed = SPEED_KNEE * np.clip(-1, 1, action[1])
self.joints[2].motorSpeed = SPEED_HIP * np.clip(-1, 1, action[2])
self.joints[2].motorSpeed = SPEED_KNEE * np.clip(-1, 1, action[3])
else:
self.joints[0].motorSpeed = SPEED_HIP * np.sign(action[0])
self.joints[0].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[0]))
self.joints[1].motorSpeed = SPEED_KNEE * np.sign(action[1])
self.joints[1].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[1]))
self.joints[2].motorSpeed = SPEED_HIP * np.sign(action[2])
self.joints[2].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[2]))
self.joints[3].motorSpeed = SPEED_KNEE * np.sign(action[3])
self.joints[3].maxMotorTorque = MOTORS_TORQUE * np.clip(0, 1, np.abs(action[3]))
self.world.Step(1.0/FPS, 6*30, 2*30)
pos = self.hull.position
vel = self.hull.linearVelocity
class LidarCallback(Box2D.b2.rayCastCallback):
def ReportFixture(self, fixture, point, normal, fraction):
if (fixture.filterData.categoryBits & 1) == 0:
return 1
self.p2 = point
self.fraction = fraction
return 0
self.lidar = [LidarCallback() for _ in xrange(10)]
for i in xrange(10):
self.lidar[i].fraction = 1.0
self.lidar[i].p1 = pos
self.lidar[i].p2 = (
pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
state = [
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
0.2*self.hull.angularVelocity,
vel.x*(VIEWPORT_W/SCALE)/FPS,
vel.y*(VIEWPORT_H/SCALE)/FPS,
self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.joints[0].speed / SPEED_HIP,
self.joints[1].angle + 1.0,
self.joints[1].speed / SPEED_KNEE,
self.joints[2].angle,
self.joints[2].speed / SPEED_HIP,
self.joints[3].angle + 1.0,
self.joints[3].speed / SPEED_KNEE
]
state += [l.fraction for l in self.lidar]
#print " ".join( ["%+0.2f" % x for x in state] )
self.scroll = pos.x - VIEWPORT_W/SCALE/5
shaping = pos[0]/SCALE # moving forward is a way to receive reward (up to 2.0 on 1000 rollout time)
shaping -= 0.1*abs(state[0]) # keep head straight, other than that and falling, any behavior is unpunished
#print "shaping", shaping
reward = 0
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
done = False
if self.game_over or pos[0] < 0:
done = True
reward = -1
return np.array(state), reward, done, {}
def _render(self, mode='human', close=False):
if close:
if self.viewer is not None:
self.viewer.close()
return
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
self.viewer.draw_polygon( [
(self.scroll, 0),
(self.scroll+VIEWPORT_W/SCALE, 0),
(self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
(self.scroll, VIEWPORT_H/SCALE),
], color=(0.9, 0.9, 1.0) )
for poly,x1,x2 in self.cloud_poly:
if x2 < self.scroll/2: continue
if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll: continue
if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon(poly, color=color)
if np.random.random() > 0.5:
l = np.random.choice(self.lidar)
self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
flagy1 = TERRAIN_HEIGHT
flagy2 = flagy1 + 50/SCALE
x = TERRAIN_STEP*3
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
self.viewer.render()
if mode == 'rgb_array':
return self.viewer.get_array()
elif mode is 'human':
pass
else:
return super(BipedalWalker, self).render(mode=mode)
class BipedalWalkerHardcore(BipedalWalker):
hardcore = True