From c6deb81ad3b6da7adbbffd4a2df2002a53861d8f Mon Sep 17 00:00:00 2001 From: Jet <38184875+jjshoots@users.noreply.github.com> Date: Fri, 15 Apr 2022 16:04:24 +0100 Subject: [PATCH] Add mild domain randomization to Car Racing Env (#2749) * first commit domain randomize * black * update doc * add some type hints and internalized some functions * we were told, the black bear is innocent; but I should not like to trust myself wit him * Don't need two color conventions * don't multiply twice * hardcore -> domain_randomize & register * remove rogue decorator --- gym/envs/__init__.py | 8 ++++ gym/envs/box2d/car_racing.py | 82 ++++++++++++++++++++++++------------ gym/envs/registration.py | 2 +- 3 files changed, 65 insertions(+), 27 deletions(-) diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index 78fe08c68..163b3019b 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -89,6 +89,14 @@ register( reward_threshold=900, ) +register( + id="CarRacingDomainRandomize-v1", + entry_point="gym.envs.box2d:CarRacing", + kwargs={"domain_randomize": True}, + max_episode_steps=1000, + reward_threshold=900, +) + # Toy Text # ---------------------------------------- diff --git a/gym/envs/box2d/car_racing.py b/gym/envs/box2d/car_racing.py index b95c2cde0..e5f874818 100644 --- a/gym/envs/box2d/car_racing.py +++ b/gym/envs/box2d/car_racing.py @@ -34,8 +34,6 @@ TRACK_WIDTH = 40 / SCALE BORDER = 8 / SCALE BORDER_MIN_COUNT = 4 -ROAD_COLOR = [0.4, 0.4, 0.4] - class FrictionDetector(contactListener): def __init__(self, env, lap_complete_percent): @@ -63,9 +61,8 @@ class FrictionDetector(contactListener): if not tile: return - tile.color[0] = ROAD_COLOR[0] - tile.color[1] = ROAD_COLOR[1] - tile.color[2] = ROAD_COLOR[2] + # inherit tile color from env + tile.color = self.env.norm_road_color / 255 if not obj or "tiles" not in obj.__dict__: return if begin: @@ -128,10 +125,15 @@ class CarRacing(gym.Env, EzPickle): receive -100 reward and die. ### Arguments - There are no arguments supported in constructing the environment. + `lap_complete_percent` dictates the percentage of tiles that must be visited by + the agent before a lap is considered complete. + + Passing `domain_randomize=True` enabled the domain randomized variant of the environment. + In this scenario, the background and track colours are different on every reset. ### Version History - - v0: Current version + - v1: Current version (0.24.0) + - v0: Original version ### References - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car. @@ -145,8 +147,16 @@ class CarRacing(gym.Env, EzPickle): "render_fps": FPS, } - def __init__(self, verbose=1, lap_complete_percent=0.95): + def __init__( + self, + verbose: bool = True, + lap_complete_percent: float = 0.95, + domain_randomize: bool = False, + ): EzPickle.__init__(self) + self.domain_randomize = domain_randomize + self._init_colors() + self.contactListener_keepref = FrictionDetector(self, lap_complete_percent) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) self.screen = None @@ -183,6 +193,22 @@ class CarRacing(gym.Env, EzPickle): self.road = [] self.car.destroy() + def _init_colors(self): + if self.domain_randomize: + # domain randomize the bg and grass colour + self.norm_road_color = self.np_random.uniform(0, 210, size=3) + + self.bg_color = self.np_random.uniform(0, 210, size=3) + + self.grass_color = np.copy(self.bg_color) + idx = self.np_random.integers(3) + self.grass_color[idx] += 20 + else: + # default colours + self.norm_road_color = np.array([102, 102, 102]) + self.bg_color = np.array([102, 204, 102]) + self.grass_color = np.array([102, 230, 102]) + def _create_track(self): CHECKPOINTS = 12 @@ -280,7 +306,7 @@ class CarRacing(gym.Env, EzPickle): elif pass_through_start and i1 == -1: i1 = i break - if self.verbose == 1: + if self.verbose: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 @@ -338,8 +364,8 @@ class CarRacing(gym.Env, EzPickle): self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t - c = 0.01 * (i % 3) - t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] + c = 0.01 * (i % 3) * 255 + t.color = self.norm_road_color + c t.road_visited = False t.road_friction = 1.0 t.idx = i @@ -385,12 +411,13 @@ class CarRacing(gym.Env, EzPickle): self.t = 0.0 self.new_lap = False self.road_poly = [] + self._init_colors() while True: success = self._create_track() if success: break - if self.verbose == 1: + if self.verbose: print( "retry to generate track (normal if there are not many" "instances of this message)" @@ -402,7 +429,7 @@ class CarRacing(gym.Env, EzPickle): else: return self.step(None)[0], {} - def step(self, action): + def step(self, action: np.ndarray): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) @@ -432,7 +459,7 @@ class CarRacing(gym.Env, EzPickle): return self.state, step_reward, done, {} - def render(self, mode="human"): + def render(self, mode: str = "human"): import pygame pygame.font.init() @@ -459,13 +486,13 @@ class CarRacing(gym.Env, EzPickle): trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle) trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1]) - self.render_road(zoom, trans, angle) + self._render_road(zoom, trans, angle) self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels") self.surf = pygame.transform.flip(self.surf, False, True) # showing stats - self.render_indicators(WINDOW_W, WINDOW_H) + self._render_indicators(WINDOW_W, WINDOW_H) font = pygame.font.Font(pygame.font.get_default_font(), 42) text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0)) @@ -487,7 +514,7 @@ class CarRacing(gym.Env, EzPickle): else: return self.isopen - def render_road(self, zoom, translation, angle): + def _render_road(self, zoom, translation, angle): bounds = PLAYFIELD field = [ (2 * bounds, 2 * bounds), @@ -495,11 +522,13 @@ class CarRacing(gym.Env, EzPickle): (0, 0), (0, 2 * bounds), ] - trans_field = [] - self.draw_colored_polygon( - self.surf, field, (102, 204, 102), zoom, translation, angle + + # draw background + self._draw_colored_polygon( + self.surf, field, self.bg_color, zoom, translation, angle ) + # draw grass patches k = bounds / (20.0) grass = [] for x in range(0, 40, 2): @@ -513,17 +542,18 @@ class CarRacing(gym.Env, EzPickle): ] ) for poly in grass: - self.draw_colored_polygon( - self.surf, poly, (102, 230, 102), zoom, translation, angle + self._draw_colored_polygon( + self.surf, poly, self.grass_color, zoom, translation, angle ) + # draw road for poly, color in self.road_poly: # converting to pixel coordinates poly = [(p[0] + PLAYFIELD, p[1] + PLAYFIELD) for p in poly] - color = [int(c * 255) for c in color] - self.draw_colored_polygon(self.surf, poly, color, zoom, translation, angle) + color = [int(c) for c in color] + self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle) - def render_indicators(self, W, H): + def _render_indicators(self, W, H): import pygame s = W / 40.0 @@ -592,7 +622,7 @@ class CarRacing(gym.Env, EzPickle): (255, 0, 0), ) - def draw_colored_polygon(self, surface, poly, color, zoom, translation, angle): + def _draw_colored_polygon(self, surface, poly, color, zoom, translation, angle): import pygame from pygame import gfxdraw diff --git a/gym/envs/registration.py b/gym/envs/registration.py index 70f0b9345..09471c62d 100644 --- a/gym/envs/registration.py +++ b/gym/envs/registration.py @@ -649,7 +649,7 @@ def make(id: Literal["LunarLander-v2", "LunarLanderContinuous-v2"], **kwargs) -> @overload def make(id: Literal["BipedalWalker-v3", "BipedalWalkerHardcore-v3"], **kwargs) -> Env[np.ndarray, np.ndarray | Sequence[SupportsFloat]]: ... @overload -def make(id: Literal["CarRacing-v0"], **kwargs) -> Env[np.ndarray, np.ndarray | Sequence[SupportsFloat]]: ... +def make(id: Literal["CarRacing-v1", "CarRacingDomainRandomize-v1"], **kwargs) -> Env[np.ndarray, np.ndarray | Sequence[SupportsFloat]]: ... # Toy Text # ----------------------------------------