import sys import numpy as np from six.moves import xrange import gym import math import Box2D from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener) from gym import spaces import numpy as np FPS = 50 SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well MOTORS_TORQUE = 40 SPEED_HIP = 2 SPEED_KNEE = 3 LIDAR_RANGE = 140/SCALE INITIAL_RANDOM = 5 HULL_POLY =[ (-30,+9), (+6,+9), (+34,+1), (+34,-8), (-30,-8) ] LEG_DOWN = -8/SCALE LEG_W, LEG_H = 8/SCALE, 34/SCALE VIEWPORT_W = 600 VIEWPORT_H = 400 TERRAIN_STEP = 14/SCALE TERRAIN_LENGTH = 200 # in steps TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4 TERRAIN_GRASS = 10 # low long are grass spots, in steps TERRAIN_STARTPAD = 20 # in steps class ContactDetector(contactListener): def __init__(self, env): contactListener.__init__(self) self.env = env def BeginContact(self, contact): if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body: self.env.game_over = True class BipedalWalker(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second' : FPS } hardcore = False def __init__(self): self.viewer = None high = np.array([np.inf]*22) self.action_space = spaces.Box( np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]) ) self.observation_space = spaces.Box(-high, high) self.world = Box2D.b2World(contactListener=ContactDetector(self)) self.terrain = None self.hull = None self.prev_shaping = None self._reset() def _destroy(self): if not self.terrain: return for t in self.terrain: self.world.DestroyBody(t) self.terrain = [] self.world.DestroyBody(self.hull) self.hull = None for leg in self.legs: self.world.DestroyBody(leg) self.legs = [] self.joints = [] def _generate_terrain(self, hardcore): GRASS, STUMP, STAIRS, PIT, _STATES_ = xrange(5) state = GRASS velocity = 0.0 y = TERRAIN_HEIGHT counter = TERRAIN_STARTPAD oneshot = False self.terrain = [] self.terrain_x = [] self.terrain_y = [] for i in xrange(TERRAIN_LENGTH): x = i*TERRAIN_STEP self.terrain_x.append(x) if state==GRASS and not oneshot: velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y) if i > TERRAIN_STARTPAD: velocity += np.random.uniform(-1, 1)/SCALE #1 y += velocity elif state==PIT and oneshot: counter = np.random.randint(3, 5) poly = [ (x, y), (x+TERRAIN_STEP, y), (x+TERRAIN_STEP, y-4*TERRAIN_STEP), (x, y-4*TERRAIN_STEP), ] t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=poly), friction = 0.1 )) t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6) self.terrain.append(t) t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]), friction = 0.1 )) t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6) self.terrain.append(t) counter += 2 original_y = y elif state==PIT and not oneshot: y = original_y if counter > 1: y -= 4*TERRAIN_STEP elif state==STUMP and oneshot: counter = np.random.randint(1, 3) poly = [ (x, y), (x+counter*TERRAIN_STEP, y), (x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP), (x, y+counter*TERRAIN_STEP), ] t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=poly), friction = 0.1 )) t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6) self.terrain.append(t) elif state==STAIRS and oneshot: stair_height = +1 if np.random.ranf() > 0.5 else -1 stair_width = np.random.randint(4, 5) stair_steps = np.random.randint(3, 5) original_y = y for s in xrange(stair_steps): poly = [ (x+( s*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP), (x+((1+s)*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP), (x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP), (x+( s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP), ] t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=poly), friction = 0.1 )) t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6) self.terrain.append(t) counter = stair_steps*stair_width elif state==STAIRS and not oneshot: s = stair_steps*stair_width - counter - stair_height n = s/stair_width y = original_y + (n*stair_height)*TERRAIN_STEP oneshot = False self.terrain_y.append(y) counter -= 1 if counter==0: counter = np.random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS) if state==GRASS and hardcore: state = np.random.randint(1, _STATES_) oneshot = True else: state = GRASS oneshot = True self.terrain_poly = [] for i in xrange(TERRAIN_LENGTH-1): poly = [ (self.terrain_x[i], self.terrain_y[i]), (self.terrain_x[i+1], self.terrain_y[i+1]) ] t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=edgeShape(vertices=poly), friction = 0.1, categoryBits=0x0001, )) color = (0.3, 1.0 if i%2==0 else 0.8, 0.3) t.color1 = color t.color2 = color self.terrain.append(t) color = (0.4, 0.6, 0.3) poly += [ (poly[1][0], 0), (poly[0][0], 0) ] self.terrain_poly.append( (poly, color) ) self.terrain.reverse() def _generate_clouds(self): # Sorry for the clouds, couldn't resist self.cloud_poly = [] for i in xrange(TERRAIN_LENGTH//20): x = np.random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP y = VIEWPORT_H/SCALE*3/4 poly = [ (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP), y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP) ) for a in xrange(5) ] x1 = min( [p[0] for p in poly] ) x2 = max( [p[0] for p in poly] ) self.cloud_poly.append( (poly,x1,x2) ) def _reset(self): self._destroy() self.game_over = False self.prev_shaping = None self.scroll = 0.0 W = VIEWPORT_W/SCALE H = VIEWPORT_H/SCALE self._generate_terrain(self.hardcore) self._generate_clouds() init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2 init_y = TERRAIN_HEIGHT+2*LEG_H self.hull = self.world.CreateDynamicBody( position = (init_x, init_y), fixtures = fixtureDef( shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]), density=5.0, friction=0.1, categoryBits=0x0020, maskBits=0x001, # collide only with ground restitution=0.0) # 0.99 bouncy ) self.hull.color1 = (0.5,0.4,0.9) self.hull.color2 = (0.3,0.3,0.5) self.hull.ApplyForceToCenter((np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True) self.legs = [] self.joints = [] for i in [-1,+1]: leg = self.world.CreateDynamicBody( position = (init_x, init_y - LEG_H/2 - LEG_DOWN), angle = (i*0.05), fixtures = fixtureDef( shape=polygonShape(box=(LEG_W/2, LEG_H/2)), density=1.0, restitution=0.0, categoryBits=0x0020, maskBits=0x001) ) leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.) leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.) rjd = revoluteJointDef( bodyA=self.hull, bodyB=leg, localAnchorA=(0, LEG_DOWN), localAnchorB=(0, LEG_H/2), enableMotor=True, enableLimit=True, maxMotorTorque=MOTORS_TORQUE, motorSpeed = i, lowerAngle = -0.8, upperAngle = 1.1, ) self.legs.append(leg) self.joints.append(self.world.CreateJoint(rjd)) lower = self.world.CreateDynamicBody( position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN), angle = (i*0.05), fixtures = fixtureDef( shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)), density=1.0, restitution=0.0, categoryBits=0x0020, maskBits=0x001) ) lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.) lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.) rjd = revoluteJointDef( bodyA=leg, bodyB=lower, localAnchorA=(0, -LEG_H/2), localAnchorB=(0, LEG_H/2), enableMotor=True, enableLimit=True, maxMotorTorque=MOTORS_TORQUE, motorSpeed = 1, lowerAngle = -1.6, upperAngle = -0.1, ) self.legs.append(lower) self.joints.append(self.world.CreateJoint(rjd)) self.drawlist = self.terrain + self.legs + [self.hull] return self._step(np.array([0,0,0,0]))[0] def _step(self, action): #self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help control_speed = False # Should be easier as well if control_speed: self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(-1, 1, action[0])) self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(-1, 1, action[1])) self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(-1, 1, action[2])) self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(-1, 1, action[3])) else: self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0])) self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)) self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1])) self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)) self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2])) self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)) self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3])) self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)) self.world.Step(1.0/FPS, 6*30, 2*30) pos = self.hull.position vel = self.hull.linearVelocity class LidarCallback(Box2D.b2.rayCastCallback): def ReportFixture(self, fixture, point, normal, fraction): if (fixture.filterData.categoryBits & 1) == 0: return 1 self.p2 = point self.fraction = fraction return 0 self.lidar = [LidarCallback() for _ in xrange(10)] for i in xrange(10): self.lidar[i].fraction = 1.0 self.lidar[i].p1 = pos self.lidar[i].p2 = ( pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE, pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE) self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2) state = [ self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible. 0.2*self.hull.angularVelocity, vel.x*(VIEWPORT_W/SCALE)/FPS, vel.y*(VIEWPORT_H/SCALE)/FPS, self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too) self.joints[0].speed / SPEED_HIP, self.joints[1].angle + 1.0, self.joints[1].speed / SPEED_KNEE, self.joints[2].angle, self.joints[2].speed / SPEED_HIP, self.joints[3].angle + 1.0, self.joints[3].speed / SPEED_KNEE ] state += [l.fraction for l in self.lidar] #print " ".join( ["%+0.2f" % x for x in state] ) self.scroll = pos.x - VIEWPORT_W/SCALE/5 shaping = pos[0]/SCALE # moving forward is a way to receive reward (up to 2.0 on 1000 rollout time) shaping -= 0.1*abs(state[0]) # keep head straight, other than that and falling, any behavior is unpunished #print "shaping", shaping reward = 0 if self.prev_shaping is not None: reward = shaping - self.prev_shaping self.prev_shaping = shaping done = False if self.game_over or pos[0] < 0: done = True reward = -1 return np.array(state), reward, done, {} def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H) self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE) self.viewer.draw_polygon( [ (self.scroll, 0), (self.scroll+VIEWPORT_W/SCALE, 0), (self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE), (self.scroll, VIEWPORT_H/SCALE), ], color=(0.9, 0.9, 1.0) ) for poly,x1,x2 in self.cloud_poly: if x2 < self.scroll/2: continue if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1)) for poly, color in self.terrain_poly: if poly[1][0] < self.scroll: continue if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue self.viewer.draw_polygon(poly, color=color) if np.random.random() > 0.5: l = np.random.choice(self.lidar) self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 ) for obj in self.drawlist: for f in obj.fixtures: trans = f.body.transform if type(f.shape) is circleShape: t = rendering.Transform(translation=trans*f.shape.pos) self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t) self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t) else: path = [trans*v for v in f.shape.vertices] self.viewer.draw_polygon(path, color=obj.color1) path.append(path[0]) self.viewer.draw_polyline(path, color=obj.color2, linewidth=2) flagy1 = TERRAIN_HEIGHT flagy2 = flagy1 + 50/SCALE x = TERRAIN_STEP*3 self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 ) f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)] self.viewer.draw_polygon(f, color=(0.9,0.2,0) ) self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 ) self.viewer.render() if mode == 'rgb_array': return self.viewer.get_array() elif mode is 'human': pass else: return super(BipedalWalker, self).render(mode=mode) class BipedalWalkerHardcore(BipedalWalker): hardcore = True