mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-09-03 19:03:10 +00:00
Add mild domain randomization to Car Racing Env (#2749)
* first commit domain randomize * black * update doc * add some type hints and internalized some functions * we were told, the black bear is innocent; but I should not like to trust myself wit him * Don't need two color conventions * don't multiply twice * hardcore -> domain_randomize & register * remove rogue decorator
This commit is contained in:
@@ -89,6 +89,14 @@ register(
|
|||||||
reward_threshold=900,
|
reward_threshold=900,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id="CarRacingDomainRandomize-v1",
|
||||||
|
entry_point="gym.envs.box2d:CarRacing",
|
||||||
|
kwargs={"domain_randomize": True},
|
||||||
|
max_episode_steps=1000,
|
||||||
|
reward_threshold=900,
|
||||||
|
)
|
||||||
|
|
||||||
# Toy Text
|
# Toy Text
|
||||||
# ----------------------------------------
|
# ----------------------------------------
|
||||||
|
|
||||||
|
@@ -34,8 +34,6 @@ TRACK_WIDTH = 40 / SCALE
|
|||||||
BORDER = 8 / SCALE
|
BORDER = 8 / SCALE
|
||||||
BORDER_MIN_COUNT = 4
|
BORDER_MIN_COUNT = 4
|
||||||
|
|
||||||
ROAD_COLOR = [0.4, 0.4, 0.4]
|
|
||||||
|
|
||||||
|
|
||||||
class FrictionDetector(contactListener):
|
class FrictionDetector(contactListener):
|
||||||
def __init__(self, env, lap_complete_percent):
|
def __init__(self, env, lap_complete_percent):
|
||||||
@@ -63,9 +61,8 @@ class FrictionDetector(contactListener):
|
|||||||
if not tile:
|
if not tile:
|
||||||
return
|
return
|
||||||
|
|
||||||
tile.color[0] = ROAD_COLOR[0]
|
# inherit tile color from env
|
||||||
tile.color[1] = ROAD_COLOR[1]
|
tile.color = self.env.norm_road_color / 255
|
||||||
tile.color[2] = ROAD_COLOR[2]
|
|
||||||
if not obj or "tiles" not in obj.__dict__:
|
if not obj or "tiles" not in obj.__dict__:
|
||||||
return
|
return
|
||||||
if begin:
|
if begin:
|
||||||
@@ -128,10 +125,15 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
receive -100 reward and die.
|
receive -100 reward and die.
|
||||||
|
|
||||||
### Arguments
|
### Arguments
|
||||||
There are no arguments supported in constructing the environment.
|
`lap_complete_percent` dictates the percentage of tiles that must be visited by
|
||||||
|
the agent before a lap is considered complete.
|
||||||
|
|
||||||
|
Passing `domain_randomize=True` enabled the domain randomized variant of the environment.
|
||||||
|
In this scenario, the background and track colours are different on every reset.
|
||||||
|
|
||||||
### Version History
|
### Version History
|
||||||
- v0: Current version
|
- v1: Current version (0.24.0)
|
||||||
|
- v0: Original version
|
||||||
|
|
||||||
### References
|
### References
|
||||||
- Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.
|
- Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.
|
||||||
@@ -145,8 +147,16 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
"render_fps": FPS,
|
"render_fps": FPS,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, verbose=1, lap_complete_percent=0.95):
|
def __init__(
|
||||||
|
self,
|
||||||
|
verbose: bool = True,
|
||||||
|
lap_complete_percent: float = 0.95,
|
||||||
|
domain_randomize: bool = False,
|
||||||
|
):
|
||||||
EzPickle.__init__(self)
|
EzPickle.__init__(self)
|
||||||
|
self.domain_randomize = domain_randomize
|
||||||
|
self._init_colors()
|
||||||
|
|
||||||
self.contactListener_keepref = FrictionDetector(self, lap_complete_percent)
|
self.contactListener_keepref = FrictionDetector(self, lap_complete_percent)
|
||||||
self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
|
self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
|
||||||
self.screen = None
|
self.screen = None
|
||||||
@@ -183,6 +193,22 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
self.road = []
|
self.road = []
|
||||||
self.car.destroy()
|
self.car.destroy()
|
||||||
|
|
||||||
|
def _init_colors(self):
|
||||||
|
if self.domain_randomize:
|
||||||
|
# domain randomize the bg and grass colour
|
||||||
|
self.norm_road_color = self.np_random.uniform(0, 210, size=3)
|
||||||
|
|
||||||
|
self.bg_color = self.np_random.uniform(0, 210, size=3)
|
||||||
|
|
||||||
|
self.grass_color = np.copy(self.bg_color)
|
||||||
|
idx = self.np_random.integers(3)
|
||||||
|
self.grass_color[idx] += 20
|
||||||
|
else:
|
||||||
|
# default colours
|
||||||
|
self.norm_road_color = np.array([102, 102, 102])
|
||||||
|
self.bg_color = np.array([102, 204, 102])
|
||||||
|
self.grass_color = np.array([102, 230, 102])
|
||||||
|
|
||||||
def _create_track(self):
|
def _create_track(self):
|
||||||
CHECKPOINTS = 12
|
CHECKPOINTS = 12
|
||||||
|
|
||||||
@@ -280,7 +306,7 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
elif pass_through_start and i1 == -1:
|
elif pass_through_start and i1 == -1:
|
||||||
i1 = i
|
i1 = i
|
||||||
break
|
break
|
||||||
if self.verbose == 1:
|
if self.verbose:
|
||||||
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
|
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
|
||||||
assert i1 != -1
|
assert i1 != -1
|
||||||
assert i2 != -1
|
assert i2 != -1
|
||||||
@@ -338,8 +364,8 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
self.fd_tile.shape.vertices = vertices
|
self.fd_tile.shape.vertices = vertices
|
||||||
t = self.world.CreateStaticBody(fixtures=self.fd_tile)
|
t = self.world.CreateStaticBody(fixtures=self.fd_tile)
|
||||||
t.userData = t
|
t.userData = t
|
||||||
c = 0.01 * (i % 3)
|
c = 0.01 * (i % 3) * 255
|
||||||
t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
|
t.color = self.norm_road_color + c
|
||||||
t.road_visited = False
|
t.road_visited = False
|
||||||
t.road_friction = 1.0
|
t.road_friction = 1.0
|
||||||
t.idx = i
|
t.idx = i
|
||||||
@@ -385,12 +411,13 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
self.t = 0.0
|
self.t = 0.0
|
||||||
self.new_lap = False
|
self.new_lap = False
|
||||||
self.road_poly = []
|
self.road_poly = []
|
||||||
|
self._init_colors()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
success = self._create_track()
|
success = self._create_track()
|
||||||
if success:
|
if success:
|
||||||
break
|
break
|
||||||
if self.verbose == 1:
|
if self.verbose:
|
||||||
print(
|
print(
|
||||||
"retry to generate track (normal if there are not many"
|
"retry to generate track (normal if there are not many"
|
||||||
"instances of this message)"
|
"instances of this message)"
|
||||||
@@ -402,7 +429,7 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
else:
|
else:
|
||||||
return self.step(None)[0], {}
|
return self.step(None)[0], {}
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action: np.ndarray):
|
||||||
if action is not None:
|
if action is not None:
|
||||||
self.car.steer(-action[0])
|
self.car.steer(-action[0])
|
||||||
self.car.gas(action[1])
|
self.car.gas(action[1])
|
||||||
@@ -432,7 +459,7 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
|
|
||||||
return self.state, step_reward, done, {}
|
return self.state, step_reward, done, {}
|
||||||
|
|
||||||
def render(self, mode="human"):
|
def render(self, mode: str = "human"):
|
||||||
import pygame
|
import pygame
|
||||||
|
|
||||||
pygame.font.init()
|
pygame.font.init()
|
||||||
@@ -459,13 +486,13 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
|
trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
|
||||||
trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])
|
trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])
|
||||||
|
|
||||||
self.render_road(zoom, trans, angle)
|
self._render_road(zoom, trans, angle)
|
||||||
self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels")
|
self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels")
|
||||||
|
|
||||||
self.surf = pygame.transform.flip(self.surf, False, True)
|
self.surf = pygame.transform.flip(self.surf, False, True)
|
||||||
|
|
||||||
# showing stats
|
# showing stats
|
||||||
self.render_indicators(WINDOW_W, WINDOW_H)
|
self._render_indicators(WINDOW_W, WINDOW_H)
|
||||||
|
|
||||||
font = pygame.font.Font(pygame.font.get_default_font(), 42)
|
font = pygame.font.Font(pygame.font.get_default_font(), 42)
|
||||||
text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0))
|
text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0))
|
||||||
@@ -487,7 +514,7 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
else:
|
else:
|
||||||
return self.isopen
|
return self.isopen
|
||||||
|
|
||||||
def render_road(self, zoom, translation, angle):
|
def _render_road(self, zoom, translation, angle):
|
||||||
bounds = PLAYFIELD
|
bounds = PLAYFIELD
|
||||||
field = [
|
field = [
|
||||||
(2 * bounds, 2 * bounds),
|
(2 * bounds, 2 * bounds),
|
||||||
@@ -495,11 +522,13 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
(0, 0),
|
(0, 0),
|
||||||
(0, 2 * bounds),
|
(0, 2 * bounds),
|
||||||
]
|
]
|
||||||
trans_field = []
|
|
||||||
self.draw_colored_polygon(
|
# draw background
|
||||||
self.surf, field, (102, 204, 102), zoom, translation, angle
|
self._draw_colored_polygon(
|
||||||
|
self.surf, field, self.bg_color, zoom, translation, angle
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# draw grass patches
|
||||||
k = bounds / (20.0)
|
k = bounds / (20.0)
|
||||||
grass = []
|
grass = []
|
||||||
for x in range(0, 40, 2):
|
for x in range(0, 40, 2):
|
||||||
@@ -513,17 +542,18 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
for poly in grass:
|
for poly in grass:
|
||||||
self.draw_colored_polygon(
|
self._draw_colored_polygon(
|
||||||
self.surf, poly, (102, 230, 102), zoom, translation, angle
|
self.surf, poly, self.grass_color, zoom, translation, angle
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# draw road
|
||||||
for poly, color in self.road_poly:
|
for poly, color in self.road_poly:
|
||||||
# converting to pixel coordinates
|
# converting to pixel coordinates
|
||||||
poly = [(p[0] + PLAYFIELD, p[1] + PLAYFIELD) for p in poly]
|
poly = [(p[0] + PLAYFIELD, p[1] + PLAYFIELD) for p in poly]
|
||||||
color = [int(c * 255) for c in color]
|
color = [int(c) for c in color]
|
||||||
self.draw_colored_polygon(self.surf, poly, color, zoom, translation, angle)
|
self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle)
|
||||||
|
|
||||||
def render_indicators(self, W, H):
|
def _render_indicators(self, W, H):
|
||||||
import pygame
|
import pygame
|
||||||
|
|
||||||
s = W / 40.0
|
s = W / 40.0
|
||||||
@@ -592,7 +622,7 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
(255, 0, 0),
|
(255, 0, 0),
|
||||||
)
|
)
|
||||||
|
|
||||||
def draw_colored_polygon(self, surface, poly, color, zoom, translation, angle):
|
def _draw_colored_polygon(self, surface, poly, color, zoom, translation, angle):
|
||||||
import pygame
|
import pygame
|
||||||
from pygame import gfxdraw
|
from pygame import gfxdraw
|
||||||
|
|
||||||
|
@@ -649,7 +649,7 @@ def make(id: Literal["LunarLander-v2", "LunarLanderContinuous-v2"], **kwargs) ->
|
|||||||
@overload
|
@overload
|
||||||
def make(id: Literal["BipedalWalker-v3", "BipedalWalkerHardcore-v3"], **kwargs) -> Env[np.ndarray, np.ndarray | Sequence[SupportsFloat]]: ...
|
def make(id: Literal["BipedalWalker-v3", "BipedalWalkerHardcore-v3"], **kwargs) -> Env[np.ndarray, np.ndarray | Sequence[SupportsFloat]]: ...
|
||||||
@overload
|
@overload
|
||||||
def make(id: Literal["CarRacing-v0"], **kwargs) -> Env[np.ndarray, np.ndarray | Sequence[SupportsFloat]]: ...
|
def make(id: Literal["CarRacing-v1", "CarRacingDomainRandomize-v1"], **kwargs) -> Env[np.ndarray, np.ndarray | Sequence[SupportsFloat]]: ...
|
||||||
|
|
||||||
# Toy Text
|
# Toy Text
|
||||||
# ----------------------------------------
|
# ----------------------------------------
|
||||||
|
Reference in New Issue
Block a user