Refactor Lunar Lander and Bipedal Walker to use Pygame (#2552)

* Refactor lunar lander to use pygame

* Fix minor rendering inconsistency

* Refactor bipedal walker to use pygame

* Reformat with black

* Remove viewer

* Fix color for obstacles

* Update dependencies for box2d

* Optimize screen initialization
This commit is contained in:
Andrew Tan Jin Shen
2022-02-05 22:42:45 +08:00
committed by GitHub
parent 11760cd0b0
commit 2e36cde12e
4 changed files with 196 additions and 99 deletions

View File

@@ -5,6 +5,9 @@ import math
from typing import Optional from typing import Optional
import numpy as np import numpy as np
import pygame
from pygame import gfxdraw
import Box2D import Box2D
from Box2D.b2 import ( from Box2D.b2 import (
edgeShape, edgeShape,
@@ -159,7 +162,8 @@ class BipedalWalker(gym.Env, EzPickle):
def __init__(self, hardcore: bool = False): def __init__(self, hardcore: bool = False):
EzPickle.__init__(self) EzPickle.__init__(self)
self.viewer = None self.screen = None
self.isopen = True
self.world = Box2D.b2World() self.world = Box2D.b2World()
self.terrain = None self.terrain = None
@@ -231,14 +235,14 @@ class BipedalWalker(gym.Env, EzPickle):
] ]
self.fd_polygon.shape.vertices = poly self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t) self.terrain.append(t)
self.fd_polygon.shape.vertices = [ self.fd_polygon.shape.vertices = [
(p[0] + TERRAIN_STEP * counter, p[1]) for p in poly (p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
] ]
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t) self.terrain.append(t)
counter += 2 counter += 2
original_y = y original_y = y
@@ -258,7 +262,7 @@ class BipedalWalker(gym.Env, EzPickle):
] ]
self.fd_polygon.shape.vertices = poly self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t) self.terrain.append(t)
elif state == STAIRS and oneshot: elif state == STAIRS and oneshot:
@@ -287,7 +291,7 @@ class BipedalWalker(gym.Env, EzPickle):
] ]
self.fd_polygon.shape.vertices = poly self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t) self.terrain.append(t)
counter = stair_steps * stair_width counter = stair_steps * stair_width
@@ -316,11 +320,11 @@ class BipedalWalker(gym.Env, EzPickle):
] ]
self.fd_edge.shape.vertices = poly self.fd_edge.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_edge) t = self.world.CreateStaticBody(fixtures=self.fd_edge)
color = (0.3, 1.0 if i % 2 == 0 else 0.8, 0.3) color = (76, 255 if i % 2 == 0 else 204, 76)
t.color1 = color t.color1 = color
t.color2 = color t.color2 = color
self.terrain.append(t) self.terrain.append(t)
color = (0.4, 0.6, 0.3) color = (102, 153, 76)
poly += [(poly[1][0], 0), (poly[0][0], 0)] poly += [(poly[1][0], 0), (poly[0][0], 0)]
self.terrain_poly.append((poly, color)) self.terrain_poly.append((poly, color))
self.terrain.reverse() self.terrain.reverse()
@@ -367,8 +371,8 @@ class BipedalWalker(gym.Env, EzPickle):
self.hull = self.world.CreateDynamicBody( self.hull = self.world.CreateDynamicBody(
position=(init_x, init_y), fixtures=HULL_FD position=(init_x, init_y), fixtures=HULL_FD
) )
self.hull.color1 = (0.5, 0.4, 0.9) self.hull.color1 = (127, 51, 229)
self.hull.color2 = (0.3, 0.3, 0.5) self.hull.color2 = (76, 76, 127)
self.hull.ApplyForceToCenter( self.hull.ApplyForceToCenter(
(self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
) )
@@ -381,8 +385,8 @@ class BipedalWalker(gym.Env, EzPickle):
angle=(i * 0.05), angle=(i * 0.05),
fixtures=LEG_FD, fixtures=LEG_FD,
) )
leg.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0) leg.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
leg.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0) leg.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
rjd = revoluteJointDef( rjd = revoluteJointDef(
bodyA=self.hull, bodyA=self.hull,
bodyB=leg, bodyB=leg,
@@ -403,8 +407,8 @@ class BipedalWalker(gym.Env, EzPickle):
angle=(i * 0.05), angle=(i * 0.05),
fixtures=LOWER_FD, fixtures=LOWER_FD,
) )
lower.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0) lower.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
lower.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0) lower.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
rjd = revoluteJointDef( rjd = revoluteJointDef(
bodyA=leg, bodyA=leg,
bodyB=lower, bodyB=lower,
@@ -523,37 +527,53 @@ class BipedalWalker(gym.Env, EzPickle):
return np.array(state, dtype=np.float32), reward, done, {} return np.array(state, dtype=np.float32), reward, done, {}
def render(self, mode="human"): def render(self, mode="human"):
from gym.utils import pyglet_rendering
if self.viewer is None: if self.screen is None:
self.viewer = pyglet_rendering.Viewer(VIEWPORT_W, VIEWPORT_H) pygame.init()
self.viewer.set_bounds( self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
self.scroll, VIEWPORT_W / SCALE + self.scroll, 0, VIEWPORT_H / SCALE
)
self.viewer.draw_polygon( self.surf = pygame.Surface((VIEWPORT_W + self.scroll * SCALE, VIEWPORT_H))
[
(self.scroll, 0), pygame.transform.scale(self.surf, (SCALE, SCALE))
(self.scroll + VIEWPORT_W / SCALE, 0),
(self.scroll + VIEWPORT_W / SCALE, VIEWPORT_H / SCALE), pygame.draw.polygon(
(self.scroll, VIEWPORT_H / SCALE), self.surf,
color=(215, 215, 255),
points=[
(self.scroll * SCALE, 0),
(self.scroll * SCALE + VIEWPORT_W, 0),
(self.scroll * SCALE + VIEWPORT_W, VIEWPORT_H),
(self.scroll * SCALE, VIEWPORT_H),
], ],
color=(0.9, 0.9, 1.0),
) )
for poly, x1, x2 in self.cloud_poly: for poly, x1, x2 in self.cloud_poly:
if x2 < self.scroll / 2: if x2 < self.scroll / 2:
continue continue
if x1 > self.scroll / 2 + VIEWPORT_W / SCALE: if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
continue continue
self.viewer.draw_polygon( pygame.draw.polygon(
[(p[0] + self.scroll / 2, p[1]) for p in poly], color=(1, 1, 1) self.surf,
color=(255, 255, 255),
points=[
(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly
],
)
gfxdraw.aapolygon(
self.surf,
[(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly],
(255, 255, 255),
) )
for poly, color in self.terrain_poly: for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll: if poly[1][0] < self.scroll:
continue continue
if poly[0][0] > self.scroll + VIEWPORT_W / SCALE: if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
continue continue
self.viewer.draw_polygon(poly, color=color) scaled_poly = []
for coord in poly:
scaled_poly.append(([coord[0] * SCALE, coord[1] * SCALE]))
pygame.draw.polygon(self.surf, color=color, points=scaled_poly)
gfxdraw.aapolygon(self.surf, scaled_poly, color)
self.lidar_render = (self.lidar_render + 1) % 100 self.lidar_render = (self.lidar_render + 1) % 100
i = self.lidar_render i = self.lidar_render
@@ -563,45 +583,80 @@ class BipedalWalker(gym.Env, EzPickle):
if i < len(self.lidar) if i < len(self.lidar)
else self.lidar[len(self.lidar) - i - 1] else self.lidar[len(self.lidar) - i - 1]
) )
self.viewer.draw_polyline([l.p1, l.p2], color=(1, 0, 0), linewidth=1) pygame.draw.line(
self.surf,
color=(255, 0, 0),
start_pos=(l.p1[0] * SCALE, l.p1[1] * SCALE),
end_pos=(l.p2[0] * SCALE, l.p2[1] * SCALE),
width=1,
)
for obj in self.drawlist: for obj in self.drawlist:
for f in obj.fixtures: for f in obj.fixtures:
trans = f.body.transform trans = f.body.transform
if type(f.shape) is circleShape: if type(f.shape) is circleShape:
t = pyglet_rendering.Transform(translation=trans * f.shape.pos) pygame.draw.circle(
self.viewer.draw_circle( self.surf,
f.shape.radius, 30, color=obj.color1 color=obj.color1,
).add_attr(t) center=trans * f.shape.pos * SCALE,
self.viewer.draw_circle( radius=f.shape.radius * SCALE,
f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2 )
).add_attr(t) pygame.draw.circle(
self.surf,
color=obj.color2,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
else: else:
path = [trans * v for v in f.shape.vertices] path = [trans * v * SCALE for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1) if len(path) > 2:
path.append(path[0]) pygame.draw.polygon(self.surf, color=obj.color1, points=path)
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2) gfxdraw.aapolygon(self.surf, path, obj.color1)
path.append(path[0])
pygame.draw.polygon(
self.surf, color=obj.color2, points=path, width=1
)
gfxdraw.aapolygon(self.surf, path, obj.color2)
else:
pygame.draw.aaline(
self.surf,
start_pos=path[0],
end_pos=path[1],
color=obj.color1,
)
flagy1 = TERRAIN_HEIGHT flagy1 = TERRAIN_HEIGHT * SCALE
flagy2 = flagy1 + 50 / SCALE flagy2 = flagy1 + 50
x = TERRAIN_STEP * 3 x = TERRAIN_STEP * 3 * SCALE
self.viewer.draw_polyline( pygame.draw.aaline(
[(x, flagy1), (x, flagy2)], color=(0, 0, 0), linewidth=2 self.surf, color=(0, 0, 0), start_pos=(x, flagy1), end_pos=(x, flagy2)
) )
f = [ f = [
(x, flagy2), (x, flagy2),
(x, flagy2 - 10 / SCALE), (x, flagy2 - 10),
(x + 25 / SCALE, flagy2 - 5 / SCALE), (x + 25, flagy2 - 5),
] ]
self.viewer.draw_polygon(f, color=(0.9, 0.2, 0)) pygame.draw.polygon(self.surf, color=(230, 51, 0), points=f)
self.viewer.draw_polyline(f + [f[0]], color=(0, 0, 0), linewidth=2) pygame.draw.lines(
self.surf, color=(0, 0, 0), points=f + [f[0]], width=1, closed=False
)
return self.viewer.render(return_rgb_array=mode == "rgb_array") self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (-self.scroll * SCALE, 0))
if mode == "human":
pygame.display.flip()
if mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
else:
return self.isopen
def close(self): def close(self):
if self.viewer is not None: if self.screen is not None:
self.viewer.close() pygame.quit()
self.viewer = None self.isopen = False
class BipedalWalkerHardcore: class BipedalWalkerHardcore:

View File

@@ -5,6 +5,8 @@ import sys
from typing import Optional from typing import Optional
import numpy as np import numpy as np
import pygame
from pygame import gfxdraw
import Box2D import Box2D
from Box2D.b2 import ( from Box2D.b2 import (
@@ -144,8 +146,8 @@ class LunarLander(gym.Env, EzPickle):
def __init__(self, continuous: bool = False): def __init__(self, continuous: bool = False):
EzPickle.__init__(self) EzPickle.__init__(self)
self.viewer = None self.screen = None
self.isopen = True
self.world = Box2D.b2World() self.world = Box2D.b2World()
self.moon = None self.moon = None
self.lander = None self.lander = None
@@ -237,8 +239,8 @@ class LunarLander(gym.Env, EzPickle):
restitution=0.0, restitution=0.0,
), # 0.99 bouncy ), # 0.99 bouncy
) )
self.lander.color1 = (0.5, 0.4, 0.9) self.lander.color1 = (128, 102, 230)
self.lander.color2 = (0.3, 0.3, 0.5) self.lander.color2 = (77, 77, 128)
self.lander.ApplyForceToCenter( self.lander.ApplyForceToCenter(
( (
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
@@ -261,8 +263,8 @@ class LunarLander(gym.Env, EzPickle):
), ),
) )
leg.ground_contact = False leg.ground_contact = False
leg.color1 = (0.5, 0.4, 0.9) leg.color1 = (128, 102, 230)
leg.color2 = (0.3, 0.3, 0.5) leg.color2 = (77, 77, 128)
rjd = revoluteJointDef( rjd = revoluteJointDef(
bodyA=self.lander, bodyA=self.lander,
bodyB=leg, bodyB=leg,
@@ -433,66 +435,105 @@ class LunarLander(gym.Env, EzPickle):
return np.array(state, dtype=np.float32), reward, done, {} return np.array(state, dtype=np.float32), reward, done, {}
def render(self, mode="human"): def render(self, mode="human"):
from gym.utils import pyglet_rendering if self.screen is None:
pygame.init()
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
if self.viewer is None: self.surf = pygame.Surface(self.screen.get_size())
self.viewer = pyglet_rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(0, VIEWPORT_W / SCALE, 0, VIEWPORT_H / SCALE) pygame.transform.scale(self.surf, (SCALE, SCALE))
pygame.draw.rect(self.surf, (255, 255, 255), self.surf.get_rect())
for obj in self.particles: for obj in self.particles:
obj.ttl -= 0.15 obj.ttl -= 0.15
obj.color1 = ( obj.color1 = (
max(0.2, 0.2 + obj.ttl), int(max(0.2, 0.15 + obj.ttl) * 255),
max(0.2, 0.5 * obj.ttl), int(max(0.2, 0.5 * obj.ttl) * 255),
max(0.2, 0.5 * obj.ttl), int(max(0.2, 0.5 * obj.ttl) * 255),
) )
obj.color2 = ( obj.color2 = (
max(0.2, 0.2 + obj.ttl), int(max(0.2, 0.15 + obj.ttl) * 255),
max(0.2, 0.5 * obj.ttl), int(max(0.2, 0.5 * obj.ttl) * 255),
max(0.2, 0.5 * obj.ttl), int(max(0.2, 0.5 * obj.ttl) * 255),
) )
self._clean_particles(False) self._clean_particles(False)
for p in self.sky_polys: for p in self.sky_polys:
self.viewer.draw_polygon(p, color=(0, 0, 0)) scaled_poly = []
for coord in p:
scaled_poly.append((coord[0] * SCALE, coord[1] * SCALE))
pygame.draw.polygon(self.surf, (0, 0, 0), scaled_poly)
gfxdraw.aapolygon(self.surf, scaled_poly, (0, 0, 0))
for obj in self.particles + self.drawlist: for obj in self.particles + self.drawlist:
for f in obj.fixtures: for f in obj.fixtures:
trans = f.body.transform trans = f.body.transform
if type(f.shape) is circleShape: if type(f.shape) is circleShape:
t = pyglet_rendering.Transform(translation=trans * f.shape.pos) pygame.draw.circle(
self.viewer.draw_circle( self.surf,
f.shape.radius, 20, color=obj.color1 color=obj.color1,
).add_attr(t) center=trans * f.shape.pos * SCALE,
self.viewer.draw_circle( radius=f.shape.radius * SCALE,
f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2 )
).add_attr(t) pygame.draw.circle(
self.surf,
color=obj.color2,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
else: else:
path = [trans * v for v in f.shape.vertices] path = [trans * v * SCALE for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1) pygame.draw.polygon(self.surf, color=obj.color1, points=path)
path.append(path[0]) gfxdraw.aapolygon(self.surf, path, obj.color1)
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2) pygame.draw.aalines(
self.surf, color=obj.color2, points=path, closed=True
)
for x in [self.helipad_x1, self.helipad_x2]: for x in [self.helipad_x1, self.helipad_x2]:
flagy1 = self.helipad_y x = x * SCALE
flagy2 = flagy1 + 50 / SCALE flagy1 = self.helipad_y * SCALE
self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1)) flagy2 = flagy1 + 50
self.viewer.draw_polygon( pygame.draw.line(
[ self.surf,
(x, flagy2), color=(255, 255, 255),
(x, flagy2 - 10 / SCALE), start_pos=(x, flagy1),
(x + 25 / SCALE, flagy2 - 5 / SCALE), end_pos=(x, flagy2),
], width=1,
color=(0.8, 0.8, 0), )
pygame.draw.polygon(
self.surf,
color=(204, 204, 0),
points=[
(x, flagy2),
(x, flagy2 - 10),
(x + 25, flagy2 - 5),
],
)
gfxdraw.aapolygon(
self.surf,
[(x, flagy2), (x, flagy2 - 10), (x + 25, flagy2 - 5)],
(204, 204, 0),
)
self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (0, 0))
if mode == "human":
pygame.display.flip()
if mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
) )
else:
return self.viewer.render(return_rgb_array=mode == "rgb_array") return self.isopen
def close(self): def close(self):
if self.viewer is not None: if self.screen is not None:
self.viewer.close() pygame.quit()
self.viewer = None self.isopen = False
def heuristic(env, s): def heuristic(env, s):

View File

@@ -8,3 +8,4 @@ pyglet>=1.4.0
pygame==2.1.0 pygame==2.1.0
cloudpickle>=1.2.0 cloudpickle>=1.2.0
lz4>=3.1.0 lz4>=3.1.0
pygame==2.1.0

View File

@@ -12,7 +12,7 @@ from version import VERSION
extras = { extras = {
"atari": ["ale-py~=0.7.1"], "atari": ["ale-py~=0.7.1"],
"accept-rom-license": ["autorom[accept-rom-license]~=0.4.2"], "accept-rom-license": ["autorom[accept-rom-license]~=0.4.2"],
"box2d": ["box2d-py==2.3.5", "pyglet>=1.4.0"], "box2d": ["box2d-py==2.3.5", "pygame==2.1.0"],
"classic_control": ["pyglet>=1.4.0"], "classic_control": ["pyglet>=1.4.0"],
"mujoco": ["mujoco_py>=1.50, <2.0"], "mujoco": ["mujoco_py>=1.50, <2.0"],
"toy_text": ["pygame==2.1.0", "scipy>=1.4.1"], "toy_text": ["pygame==2.1.0", "scipy>=1.4.1"],