Change end-of-episode in CarRacing to termination as opposed to truncation (#813)

This commit is contained in:
Ariel Kwiatkowski
2024-08-09 11:30:38 +02:00
committed by GitHub
parent 1a92702f17
commit c9e2957435
12 changed files with 32 additions and 30 deletions

View File

@@ -127,7 +127,7 @@ In order to wrap an environment, you must first initialize a base environment. T
```python ```python
>>> import gymnasium as gym >>> import gymnasium as gym
>>> from gymnasium.wrappers import FlattenObservation >>> from gymnasium.wrappers import FlattenObservation
>>> env = gym.make("CarRacing-v2") >>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape >>> env.observation_space.shape
(96, 96, 3) (96, 96, 3)
>>> wrapped_env = FlattenObservation(env) >>> wrapped_env = FlattenObservation(env)
@@ -156,7 +156,7 @@ If you have a wrapped environment, and you want to get the unwrapped environment
```python ```python
>>> wrapped_env >>> wrapped_env
<FlattenObservation<TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v2>>>>>> <FlattenObservation<TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v3>>>>>>
>>> wrapped_env.unwrapped >>> wrapped_env.unwrapped
<gymnasium.envs.box2d.car_racing.CarRacing object at 0x7f04efcb8850> <gymnasium.envs.box2d.car_racing.CarRacing object at 0x7f04efcb8850>
``` ```

View File

@@ -115,7 +115,7 @@ register(
) )
register( register(
id="CarRacing-v2", id="CarRacing-v3",
entry_point="gymnasium.envs.box2d.car_racing:CarRacing", entry_point="gymnasium.envs.box2d.car_racing:CarRacing",
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=900, reward_threshold=900,

View File

@@ -153,9 +153,9 @@ class CarRacing(gym.Env, EzPickle):
```python ```python
>>> import gymnasium as gym >>> import gymnasium as gym
>>> env = gym.make("CarRacing-v2", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False) >>> env = gym.make("CarRacing-v3", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False)
>>> env >>> env
<TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v2>>>>> <TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v3>>>>>
``` ```
@@ -176,7 +176,7 @@ class CarRacing(gym.Env, EzPickle):
```python ```python
>>> import gymnasium as gym >>> import gymnasium as gym
>>> env = gym.make("CarRacing-v2", domain_randomize=True) >>> env = gym.make("CarRacing-v3", domain_randomize=True)
# normal reset, this changes the colour scheme by default # normal reset, this changes the colour scheme by default
>>> obs, _ = env.reset() >>> obs, _ = env.reset()
@@ -190,6 +190,7 @@ class CarRacing(gym.Env, EzPickle):
``` ```
## Version History ## Version History
- v2: Change truncation to termination when finishing the lap (1.0.0)
- v1: Change track completion logic and add domain randomization (0.24.0) - v1: Change track completion logic and add domain randomization (0.24.0)
- v0: Original version - v0: Original version
@@ -564,6 +565,7 @@ class CarRacing(gym.Env, EzPickle):
step_reward = 0 step_reward = 0
terminated = False terminated = False
truncated = False truncated = False
info = {}
if action is not None: # First step without action, called from reset() if action is not None: # First step without action, called from reset()
self.reward -= 0.1 self.reward -= 0.1
# We actually don't want to count fuel spent, we want car to be faster. # We actually don't want to count fuel spent, we want car to be faster.
@@ -572,18 +574,18 @@ class CarRacing(gym.Env, EzPickle):
step_reward = self.reward - self.prev_reward step_reward = self.reward - self.prev_reward
self.prev_reward = self.reward self.prev_reward = self.reward
if self.tile_visited_count == len(self.track) or self.new_lap: if self.tile_visited_count == len(self.track) or self.new_lap:
# Truncation due to finishing lap # Termination due to finishing lap
# This should not be treated as a failure terminated = True
# but like a timeout info["lap_finished"] = True
truncated = True
x, y = self.car.hull.position x, y = self.car.hull.position
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
terminated = True terminated = True
info["lap_finished"] = False
step_reward = -100 step_reward = -100
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
return self.state, step_reward, terminated, truncated, {} return self.state, step_reward, terminated, truncated, info
def render(self): def render(self):
if self.render_mode is None: if self.render_mode is None:

View File

@@ -212,7 +212,7 @@ def play(
>>> import gymnasium as gym >>> import gymnasium as gym
>>> import numpy as np >>> import numpy as np
>>> from gymnasium.utils.play import play >>> from gymnasium.utils.play import play
>>> play(gym.make("CarRacing-v2", render_mode="rgb_array"), # doctest: +SKIP >>> play(gym.make("CarRacing-v3", render_mode="rgb_array"), # doctest: +SKIP
... keys_to_action={ ... keys_to_action={
... "w": np.array([0, 0.7, 0], dtype=np.float32), ... "w": np.array([0, 0.7, 0], dtype=np.float32),
... "a": np.array([-1, 0, 0], dtype=np.float32), ... "a": np.array([-1, 0, 0], dtype=np.float32),

View File

@@ -309,7 +309,7 @@ class FrameStackObservation(
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> from gymnasium.wrappers import FrameStackObservation >>> from gymnasium.wrappers import FrameStackObservation
>>> env = gym.make("CarRacing-v2") >>> env = gym.make("CarRacing-v3")
>>> env = FrameStackObservation(env, stack_size=4) >>> env = FrameStackObservation(env, stack_size=4)
>>> env.observation_space >>> env.observation_space
Box(0, 255, (4, 96, 96, 3), uint8) Box(0, 255, (4, 96, 96, 3), uint8)

View File

@@ -225,7 +225,7 @@ class FlattenObservation(
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> from gymnasium.wrappers import FlattenObservation >>> from gymnasium.wrappers import FlattenObservation
>>> env = gym.make("CarRacing-v2") >>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape >>> env.observation_space.shape
(96, 96, 3) (96, 96, 3)
>>> env = FlattenObservation(env) >>> env = FlattenObservation(env)
@@ -267,7 +267,7 @@ class GrayscaleObservation(
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> from gymnasium.wrappers import GrayscaleObservation >>> from gymnasium.wrappers import GrayscaleObservation
>>> env = gym.make("CarRacing-v2") >>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape >>> env.observation_space.shape
(96, 96, 3) (96, 96, 3)
>>> grayscale_env = GrayscaleObservation(env) >>> grayscale_env = GrayscaleObservation(env)
@@ -345,7 +345,7 @@ class ResizeObservation(
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> from gymnasium.wrappers import ResizeObservation >>> from gymnasium.wrappers import ResizeObservation
>>> env = gym.make("CarRacing-v2") >>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape >>> env.observation_space.shape
(96, 96, 3) (96, 96, 3)
>>> resized_env = ResizeObservation(env, (32, 32)) >>> resized_env = ResizeObservation(env, (32, 32))
@@ -416,7 +416,7 @@ class ReshapeObservation(
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> from gymnasium.wrappers import ReshapeObservation >>> from gymnasium.wrappers import ReshapeObservation
>>> env = gym.make("CarRacing-v2") >>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape >>> env.observation_space.shape
(96, 96, 3) (96, 96, 3)
>>> reshape_env = ReshapeObservation(env, (24, 4, 96, 1, 3)) >>> reshape_env = ReshapeObservation(env, (24, 4, 96, 1, 3))

View File

@@ -213,7 +213,7 @@ class FlattenObservation(VectorizeTransformObservation):
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync") >>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123) >>> obs, info = envs.reset(seed=123)
>>> obs.shape >>> obs.shape
(3, 96, 96, 3) (3, 96, 96, 3)
@@ -238,7 +238,7 @@ class GrayscaleObservation(VectorizeTransformObservation):
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync") >>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123) >>> obs, info = envs.reset(seed=123)
>>> obs.shape >>> obs.shape
(3, 96, 96, 3) (3, 96, 96, 3)
@@ -266,7 +266,7 @@ class ResizeObservation(VectorizeTransformObservation):
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync") >>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123) >>> obs, info = envs.reset(seed=123)
>>> obs.shape >>> obs.shape
(3, 96, 96, 3) (3, 96, 96, 3)
@@ -292,7 +292,7 @@ class ReshapeObservation(VectorizeTransformObservation):
Example: Example:
>>> import gymnasium as gym >>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync") >>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123) >>> obs, info = envs.reset(seed=123)
>>> obs.shape >>> obs.shape
(3, 96, 96, 3) (3, 96, 96, 3)

View File

@@ -242,7 +242,7 @@ def test_make_human_rendering(register_rendering_testing_envs):
TypeError, TypeError,
match=re.escape("got an unexpected keyword argument 'render'"), match=re.escape("got an unexpected keyword argument 'render'"),
): ):
gym.make("CarRacing-v2", render="human") gym.make("CarRacing-v3", render="human")
# This test checks that a user can create an environment without the metadata including the render mode # This test checks that a user can create an environment without the metadata including the render mode
with pytest.warns( with pytest.warns(

View File

@@ -54,7 +54,7 @@ def test_carracing_domain_randomize():
CarRacing DomainRandomize should have different colours at every reset. CarRacing DomainRandomize should have different colours at every reset.
However, it should have same colours when `options={"randomize": False}` is given to reset. However, it should have same colours when `options={"randomize": False}` is given to reset.
""" """
env: CarRacing = gym.make("CarRacing-v2", domain_randomize=True).unwrapped env: CarRacing = gym.make("CarRacing-v3", domain_randomize=True).unwrapped
road_color = env.road_color road_color = env.road_color
bg_color = env.bg_color bg_color = env.bg_color

View File

@@ -68,7 +68,7 @@ def test_update_running_mean_property():
def test_normalize_obs_with_vector(): def test_normalize_obs_with_vector():
def thunk(): def thunk():
env = gym.make("CarRacing-v2") env = gym.make("CarRacing-v3")
env = gym.wrappers.GrayscaleObservation(env) env = gym.wrappers.GrayscaleObservation(env)
env = gym.wrappers.NormalizeObservation(env) env = gym.wrappers.NormalizeObservation(env)
return env return env

View File

@@ -47,7 +47,7 @@ def test_resize_observation_wrapper(env):
@pytest.mark.parametrize("shape", ((10, 10), (20, 20), (60, 60), (100, 100))) @pytest.mark.parametrize("shape", ((10, 10), (20, 20), (60, 60), (100, 100)))
def test_resize_shapes(shape: tuple[int, int]): def test_resize_shapes(shape: tuple[int, int]):
env = ResizeObservation(gym.make("CarRacing-v2"), shape) env = ResizeObservation(gym.make("CarRacing-v3"), shape)
assert env.observation_space == Box( assert env.observation_space == Box(
low=0, high=255, shape=shape + (3,), dtype=np.uint8 low=0, high=255, shape=shape + (3,), dtype=np.uint8
) )
@@ -59,7 +59,7 @@ def test_resize_shapes(shape: tuple[int, int]):
def test_invalid_input(): def test_invalid_input():
env = gym.make("CarRacing-v2") env = gym.make("CarRacing-v3")
with pytest.raises(AssertionError): with pytest.raises(AssertionError):
ResizeObservation(env, ()) ResizeObservation(env, ())

View File

@@ -42,9 +42,9 @@ def custom_environments():
( (
("CustomDictEnv-v0", "FilterObservation", {"filter_keys": ["a"]}), ("CustomDictEnv-v0", "FilterObservation", {"filter_keys": ["a"]}),
("CartPole-v1", "FlattenObservation", {}), ("CartPole-v1", "FlattenObservation", {}),
("CarRacing-v2", "GrayscaleObservation", {}), ("CarRacing-v3", "GrayscaleObservation", {}),
("CarRacing-v2", "ResizeObservation", {"shape": (35, 45)}), ("CarRacing-v3", "ResizeObservation", {"shape": (35, 45)}),
("CarRacing-v2", "ReshapeObservation", {"shape": (96, 48, 6)}), ("CarRacing-v3", "ReshapeObservation", {"shape": (96, 48, 6)}),
( (
"CartPole-v1", "CartPole-v1",
"RescaleObservation", "RescaleObservation",
@@ -53,7 +53,7 @@ def custom_environments():
"max_obs": np.array([1, np.inf, 1, np.inf]), "max_obs": np.array([1, np.inf, 1, np.inf]),
}, },
), ),
("CarRacing-v2", "DtypeObservation", {"dtype": np.int32}), ("CarRacing-v3", "DtypeObservation", {"dtype": np.int32}),
# ("CartPole-v1", "RenderObservation", {}), # not implemented # ("CartPole-v1", "RenderObservation", {}), # not implemented
# ("CartPole-v1", "TimeAwareObservation", {}), # not implemented # ("CartPole-v1", "TimeAwareObservation", {}), # not implemented
# ("CartPole-v1", "FrameStackObservation", {}), # not implemented # ("CartPole-v1", "FrameStackObservation", {}), # not implemented