diff --git a/.github/workflows/docs-build-dev.yml b/.github/workflows/docs-build-dev.yml index 43621d991..24f6367b1 100644 --- a/.github/workflows/docs-build-dev.yml +++ b/.github/workflows/docs-build-dev.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.12' - name: Install dependencies run: pip install -r docs/requirements.txt diff --git a/.github/workflows/docs-build-release.yml b/.github/workflows/docs-build-release.yml index 6d674da67..c35db945a 100644 --- a/.github/workflows/docs-build-release.yml +++ b/.github/workflows/docs-build-release.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.12' - name: Install dependencies run: pip install -r docs/requirements.txt diff --git a/.github/workflows/docs-manual-build.yml b/.github/workflows/docs-manual-build.yml index 923d85979..1082ee0d9 100644 --- a/.github/workflows/docs-manual-build.yml +++ b/.github/workflows/docs-manual-build.yml @@ -33,7 +33,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.12' - name: Install dependencies run: pip install -r docs/requirements.txt diff --git a/.github/workflows/run-pre-commit.yml b/.github/workflows/run-pre-commit.yml index c3bd7583d..28a28b6ec 100644 --- a/.github/workflows/run-pre-commit.yml +++ b/.github/workflows/run-pre-commit.yml @@ -15,6 +15,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 + with: + python_version: '3.12' - run: pipx install pre-commit - run: pre-commit --version - run: pre-commit run --all-files diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 8d3535b75..3e2e22aca 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -10,7 +10,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: ['3.10', '3.11', '3.12'] + python-version: ['3.10', '3.11', '3.12', '3.13'] numpy-version: ['>=1.21,<2.0', '>=2.1'] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/run-tutorial.yml b/.github/workflows/run-tutorial.yml index 4f5c1712c..a2aa1b2ce 100644 --- a/.github/workflows/run-tutorial.yml +++ b/.github/workflows/run-tutorial.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false # This ensures all matrix combinations run even if one fails matrix: - python-version: ["3.10"] + python-version: ["3.12"] tutorial-group: - gymnasium_basics - training_agents diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c02a0632b..95f042cf2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,13 +18,13 @@ repos: - id: detect-private-key - id: debug-statements - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell args: - --ignore-words-list=reacher,ure,referenc,wile,ontact,manuel - repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 + rev: 7.2.0 hooks: - id: flake8 args: @@ -35,16 +35,16 @@ repos: - --show-source - --statistics - repo: https://github.com/asottile/pyupgrade - rev: v3.19.1 + rev: v3.20.0 hooks: - id: pyupgrade - args: ["--py38-plus"] + args: ["--py310-plus"] - repo: https://github.com/PyCQA/isort - rev: 5.13.2 + rev: 6.0.1 hooks: - id: isort - repo: https://github.com/python/black - rev: 24.10.0 + rev: 25.1.0 hooks: - id: black - repo: https://github.com/pycqa/pydocstyle diff --git a/README.md b/README.md index 89a4090db..ff231a305 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ To install the base Gymnasium library, use `pip install gymnasium` This does not include dependencies for all families of environments (there's a massive number, and some can be problematic to install on certain systems). You can install these dependencies for one family like `pip install "gymnasium[atari]"` or use `pip install "gymnasium[all]"` to install all dependencies. -We support and test for Python 3.10, 3.11 and 3.12 on Linux and macOS. We will accept PRs related to Windows, but do not officially support it. +We support and test for Python 3.10, 3.11, 3.12 and 3.13 on Linux and macOS. We will accept PRs related to Windows, but do not officially support it. ## API diff --git a/gymnasium/envs/__init__.py b/gymnasium/envs/__init__.py index d9f594752..ed6760d73 100644 --- a/gymnasium/envs/__init__.py +++ b/gymnasium/envs/__init__.py @@ -184,8 +184,18 @@ register( # Mujoco # ---------------------------------------- + +def _raise_mujoco_py_error(*args: Any, **kwargs: Any): + raise ImportError( + "The mujoco v2 and v3 based environments have been moved to the gymnasium-robotics project (https://github.com/Farama-Foundation/gymnasium-robotics)." + ) + + # manipulation + +register(id="Reacher-v2", entry_point=_raise_mujoco_py_error) + register( id="Reacher-v4", entry_point="gymnasium.envs.mujoco.reacher_v4:ReacherEnv", @@ -200,6 +210,8 @@ register( reward_threshold=-3.75, ) +register(id="Pusher-v2", entry_point=_raise_mujoco_py_error) + register( id="Pusher-v4", entry_point="gymnasium.envs.mujoco.pusher_v4:PusherEnv", @@ -216,6 +228,8 @@ register( # balance +register(id="InvertedPendulum-v2", entry_point=_raise_mujoco_py_error) + register( id="InvertedPendulum-v4", entry_point="gymnasium.envs.mujoco.inverted_pendulum_v4:InvertedPendulumEnv", @@ -230,6 +244,8 @@ register( reward_threshold=950.0, ) +register(id="InvertedDoublePendulum-v2", entry_point=_raise_mujoco_py_error) + register( id="InvertedDoublePendulum-v4", entry_point="gymnasium.envs.mujoco.inverted_double_pendulum_v4:InvertedDoublePendulumEnv", @@ -246,6 +262,10 @@ register( # runners +register(id="HalfCheetah-v2", entry_point=_raise_mujoco_py_error) + +register(id="HalfCheetah-v3", entry_point=_raise_mujoco_py_error) + register( id="HalfCheetah-v4", entry_point="gymnasium.envs.mujoco.half_cheetah_v4:HalfCheetahEnv", @@ -260,6 +280,10 @@ register( reward_threshold=4800.0, ) +register(id="Hopper-v2", entry_point=_raise_mujoco_py_error) + +register(id="Hopper-v3", entry_point=_raise_mujoco_py_error) + register( id="Hopper-v4", entry_point="gymnasium.envs.mujoco.hopper_v4:HopperEnv", @@ -274,6 +298,10 @@ register( reward_threshold=3800.0, ) +register(id="Swimmer-v2", entry_point=_raise_mujoco_py_error) + +register(id="Swimmer-v3", entry_point=_raise_mujoco_py_error) + register( id="Swimmer-v4", entry_point="gymnasium.envs.mujoco.swimmer_v4:SwimmerEnv", @@ -288,18 +316,26 @@ register( reward_threshold=360.0, ) +register(id="Walker2d-v2", entry_point=_raise_mujoco_py_error) + +register(id="Walker2d-v3", entry_point=_raise_mujoco_py_error) + register( id="Walker2d-v4", - max_episode_steps=1000, entry_point="gymnasium.envs.mujoco.walker2d_v4:Walker2dEnv", + max_episode_steps=1000, ) register( id="Walker2d-v5", - max_episode_steps=1000, entry_point="gymnasium.envs.mujoco.walker2d_v5:Walker2dEnv", + max_episode_steps=1000, ) +register(id="Ant-v2", entry_point=_raise_mujoco_py_error) + +register(id="Ant-v3", entry_point=_raise_mujoco_py_error) + register( id="Ant-v4", entry_point="gymnasium.envs.mujoco.ant_v4:AntEnv", @@ -314,6 +350,10 @@ register( reward_threshold=6000.0, ) +register(id="Humanoid-v2", entry_point=_raise_mujoco_py_error) + +register(id="Humanoid-v3", entry_point=_raise_mujoco_py_error) + register( id="Humanoid-v4", entry_point="gymnasium.envs.mujoco.humanoid_v4:HumanoidEnv", @@ -326,6 +366,8 @@ register( max_episode_steps=1000, ) +register(id="HumanoidStandup-v2", entry_point=_raise_mujoco_py_error) + register( id="HumanoidStandup-v4", entry_point="gymnasium.envs.mujoco.humanoidstandup_v4:HumanoidStandupEnv", diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 94dda8bfc..05519a429 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -1,7 +1,7 @@ __credits__ = ["Andrea PIERRÉ"] import math -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING import numpy as np @@ -174,13 +174,13 @@ class BipedalWalker(gym.Env, EzPickle): "render_fps": FPS, } - def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False): + def __init__(self, render_mode: str | None = None, hardcore: bool = False): EzPickle.__init__(self, render_mode, hardcore) self.isopen = True self.world = Box2D.b2World() - self.terrain: List[Box2D.b2Body] = [] - self.hull: Optional[Box2D.b2Body] = None + self.terrain: list[Box2D.b2Body] = [] + self.hull: Box2D.b2Body | None = None self.prev_shaping = None @@ -264,7 +264,7 @@ class BipedalWalker(gym.Env, EzPickle): # state += [l.fraction for l in self.lidar] self.render_mode = render_mode - self.screen: Optional[pygame.Surface] = None + self.screen: pygame.Surface | None = None self.clock = None def _destroy(self): @@ -432,8 +432,8 @@ class BipedalWalker(gym.Env, EzPickle): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) self._destroy() @@ -458,8 +458,8 @@ class BipedalWalker(gym.Env, EzPickle): (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True ) - self.legs: List[Box2D.b2Body] = [] - self.joints: List[Box2D.b2RevoluteJoint] = [] + self.legs: list[Box2D.b2Body] = [] + self.joints: list[Box2D.b2RevoluteJoint] = [] for i in [-1, +1]: leg = self.world.CreateDynamicBody( position=(init_x, init_y - LEG_H / 2 - LEG_DOWN), diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py index b0b1b3b25..cda77fd66 100644 --- a/gymnasium/envs/box2d/car_racing.py +++ b/gymnasium/envs/box2d/car_racing.py @@ -1,7 +1,6 @@ __credits__ = ["Andrea PIERRÉ"] import math -from typing import Optional, Union import numpy as np @@ -212,7 +211,7 @@ class CarRacing(gym.Env, EzPickle): def __init__( self, - render_mode: Optional[str] = None, + render_mode: str | None = None, verbose: bool = False, lap_complete_percent: float = 0.95, domain_randomize: bool = False, @@ -233,14 +232,14 @@ class CarRacing(gym.Env, EzPickle): self.contactListener_keepref = FrictionDetector(self, self.lap_complete_percent) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) - self.screen: Optional[pygame.Surface] = None + self.screen: pygame.Surface | None = None self.surf = None self.clock = None self.isopen = True self.invisible_state_window = None self.invisible_video_window = None self.road = None - self.car: Optional[Car] = None + self.car: Car | None = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose @@ -499,8 +498,8 @@ class CarRacing(gym.Env, EzPickle): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) self._destroy() @@ -538,7 +537,7 @@ class CarRacing(gym.Env, EzPickle): self.render() return self.step(None)[0], {} - def step(self, action: Union[np.ndarray, int]): + def step(self, action: np.ndarray | int): assert self.car is not None if action is not None: if self.continuous: diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 63ebae816..6a34694f8 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -1,7 +1,7 @@ __credits__ = ["Andrea PIERRÉ"] import math -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING import numpy as np @@ -213,7 +213,7 @@ class LunarLander(gym.Env, EzPickle): def __init__( self, - render_mode: Optional[str] = None, + render_mode: str | None = None, continuous: bool = False, gravity: float = -10.0, enable_wind: bool = False, @@ -254,7 +254,7 @@ class LunarLander(gym.Env, EzPickle): self.isopen = True self.world = Box2D.b2World(gravity=(0, gravity)) self.moon = None - self.lander: Optional[Box2D.b2Body] = None + self.lander: Box2D.b2Body | None = None self.particles = [] self.prev_reward = None @@ -323,8 +323,8 @@ class LunarLander(gym.Env, EzPickle): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) self._destroy() diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py index 80ef05e6c..28c440af2 100644 --- a/gymnasium/envs/classic_control/acrobot.py +++ b/gymnasium/envs/classic_control/acrobot.py @@ -1,7 +1,5 @@ """classic Acrobot task""" -from typing import Optional - import numpy as np from numpy import cos, pi, sin @@ -172,7 +170,7 @@ class AcrobotEnv(Env): domain_fig = None actions_num = 3 - def __init__(self, render_mode: Optional[str] = None): + def __init__(self, render_mode: str | None = None): self.render_mode = render_mode self.screen = None self.clock = None @@ -185,7 +183,7 @@ class AcrobotEnv(Env): self.action_space = spaces.Discrete(3) self.state = None - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): + def reset(self, *, seed: int | None = None, options: dict | None = None): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound # state/observations. diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py index 9bd08a015..4cb6daad7 100644 --- a/gymnasium/envs/classic_control/cartpole.py +++ b/gymnasium/envs/classic_control/cartpole.py @@ -5,7 +5,7 @@ permalink: https://perma.cc/C9ZM-652R """ import math -from typing import Optional, Tuple, Union +from typing import Union import numpy as np @@ -117,7 +117,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): } def __init__( - self, sutton_barto_reward: bool = False, render_mode: Optional[str] = None + self, sutton_barto_reward: bool = False, render_mode: str | None = None ): self._sutton_barto_reward = sutton_barto_reward @@ -228,8 +228,8 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound @@ -362,7 +362,7 @@ class CartPoleVectorEnv(VectorEnv): self, num_envs: int = 1, max_episode_steps: int = 500, - render_mode: Optional[str] = None, + render_mode: str | None = None, sutton_barto_reward: bool = False, ): self._sutton_barto_reward = sutton_barto_reward @@ -419,7 +419,7 @@ class CartPoleVectorEnv(VectorEnv): def step( self, action: np.ndarray - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, dict]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, dict]: assert self.action_space.contains( action ), f"{action!r} ({type(action)}) invalid" @@ -486,8 +486,8 @@ class CartPoleVectorEnv(VectorEnv): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py index eb72b9f99..05398f8cf 100644 --- a/gymnasium/envs/classic_control/continuous_mountain_car.py +++ b/gymnasium/envs/classic_control/continuous_mountain_car.py @@ -14,7 +14,6 @@ permalink: https://perma.cc/6Z2N-PFWC """ import math -from typing import Optional import numpy as np @@ -114,7 +113,7 @@ class Continuous_MountainCarEnv(gym.Env): "render_fps": 30, } - def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): + def __init__(self, render_mode: str | None = None, goal_velocity=0): self.min_action = -1.0 self.max_action = 1.0 self.min_position = -1.2 @@ -183,7 +182,7 @@ class Continuous_MountainCarEnv(gym.Env): # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return self.state, reward, terminated, False, {} - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): + def reset(self, *, seed: int | None = None, options: dict | None = None): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound # state/observations. diff --git a/gymnasium/envs/classic_control/mountain_car.py b/gymnasium/envs/classic_control/mountain_car.py index 5e7e11518..6f6ac258c 100644 --- a/gymnasium/envs/classic_control/mountain_car.py +++ b/gymnasium/envs/classic_control/mountain_car.py @@ -4,7 +4,6 @@ permalink: https://perma.cc/6Z2N-PFWC """ import math -from typing import Optional import numpy as np @@ -106,7 +105,7 @@ class MountainCarEnv(gym.Env): "render_fps": 30, } - def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): + def __init__(self, render_mode: str | None = None, goal_velocity=0): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 @@ -157,8 +156,8 @@ class MountainCarEnv(gym.Env): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index a908b1139..30255b0b7 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -1,7 +1,6 @@ __credits__ = ["Carlos Luis"] from os import path -from typing import Optional import numpy as np @@ -101,7 +100,7 @@ class PendulumEnv(gym.Env): "render_fps": 30, } - def __init__(self, render_mode: Optional[str] = None, g=10.0): + def __init__(self, render_mode: str | None = None, g=10.0): self.max_speed = 8 self.max_torque = 2.0 self.dt = 0.05 @@ -148,7 +147,7 @@ class PendulumEnv(gym.Env): # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return self._get_obs(), -costs, False, False, {} - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): + def reset(self, *, seed: int | None = None, options: dict | None = None): super().reset(seed=seed) if options is None: high = np.array([DEFAULT_X, DEFAULT_Y]) diff --git a/gymnasium/envs/classic_control/utils.py b/gymnasium/envs/classic_control/utils.py index 930588e1d..57349cec5 100644 --- a/gymnasium/envs/classic_control/utils.py +++ b/gymnasium/envs/classic_control/utils.py @@ -2,7 +2,7 @@ Utility functions used for classic control environments. """ -from typing import Optional, SupportsFloat, Tuple +from typing import SupportsFloat def verify_number_and_cast(x: SupportsFloat) -> float: @@ -15,8 +15,8 @@ def verify_number_and_cast(x: SupportsFloat) -> float: def maybe_parse_reset_bounds( - options: Optional[dict], default_low: float, default_high: float -) -> Tuple[float, float]: + options: dict | None, default_low: float, default_high: float +) -> tuple[float, float]: """ This function can be called during a reset() to customize the sampling ranges for setting the initial state distributions. diff --git a/gymnasium/envs/mujoco/ant_v5.py b/gymnasium/envs/mujoco/ant_v5.py index 52e0c3edc..7faea5cc8 100644 --- a/gymnasium/envs/mujoco/ant_v5.py +++ b/gymnasium/envs/mujoco/ant_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Tuple, Union - import numpy as np from gymnasium import utils @@ -213,8 +211,8 @@ class AntEnv(MujocoEnv, utils.EzPickle): - Fixed `info["x_position"]` & `info["y_position"]` & `info["distance_from_origin"]` giving `xpos` instead of `qpos` observations (`xpos` observations are behind 1 `mj_step()` more [here](https://github.com/deepmind/mujoco/issues/889#issuecomment-1568896388)) (related [GitHub issue #1](https://github.com/Farama-Foundation/Gymnasium/issues/521) & [GitHub issue #2](https://github.com/Farama-Foundation/Gymnasium/issues/539)). - Removed `info["forward_reward"]` as it is equivalent to `info["reward_forward"]`. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3, also removed contact forces from the default observation space (new variable `use_contact_forces=True` can restore them). - * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). - * v2: All continuous control environments now use mujoco-py >= 1.50. + * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release """ @@ -232,15 +230,15 @@ class AntEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "ant.xml", frame_skip: int = 5, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, forward_reward_weight: float = 1, ctrl_cost_weight: float = 0.5, contact_cost_weight: float = 5e-4, healthy_reward: float = 1.0, - main_body: Union[int, str] = 1, + main_body: int | str = 1, terminate_when_unhealthy: bool = True, - healthy_z_range: Tuple[float, float] = (0.2, 1.0), - contact_force_range: Tuple[float, float] = (-1.0, 1.0), + healthy_z_range: tuple[float, float] = (0.2, 1.0), + contact_force_range: tuple[float, float] = (-1.0, 1.0), reset_noise_scale: float = 0.1, exclude_current_positions_from_observation: bool = True, include_cfrc_ext_in_observation: bool = True, diff --git a/gymnasium/envs/mujoco/half_cheetah_v5.py b/gymnasium/envs/mujoco/half_cheetah_v5.py index 6ef6b5b1a..1bfe5b87f 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v5.py +++ b/gymnasium/envs/mujoco/half_cheetah_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas", "Rushiv Arora"] -from typing import Dict, Union - import numpy as np from gymnasium import utils @@ -138,8 +136,8 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): - Restored the `xml_file` argument (was removed in `v4`). - Renamed `info["reward_run"]` to `info["reward_forward"]` to be consistent with the other environments. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3. - * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). - * v2: All continuous control environments now use mujoco-py >= 1.50. + * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release. """ @@ -157,7 +155,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "half_cheetah.xml", frame_skip: int = 5, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 0.1, reset_noise_scale: float = 0.1, diff --git a/gymnasium/envs/mujoco/hopper_v5.py b/gymnasium/envs/mujoco/hopper_v5.py index ed541cdd8..c5683ec64 100644 --- a/gymnasium/envs/mujoco/hopper_v5.py +++ b/gymnasium/envs/mujoco/hopper_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Tuple, Union - import numpy as np from gymnasium import utils @@ -148,8 +146,8 @@ class HopperEnv(MujocoEnv, utils.EzPickle): - Added individual reward terms in `info` (`info["reward_forward"]`, `info["reward_ctrl"]`, `info["reward_survive"]`). - Added `info["z_distance_from_origin"]` which is equal to the vertical distance of the "torso" body from its initial position. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3. - * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) - * v2: All continuous control environments now use mujoco-py >= 1.50. + * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release. """ @@ -167,14 +165,14 @@ class HopperEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "hopper.xml", frame_skip: int = 4, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-3, healthy_reward: float = 1.0, terminate_when_unhealthy: bool = True, - healthy_state_range: Tuple[float, float] = (-100.0, 100.0), - healthy_z_range: Tuple[float, float] = (0.7, float("inf")), - healthy_angle_range: Tuple[float, float] = (-0.2, 0.2), + healthy_state_range: tuple[float, float] = (-100.0, 100.0), + healthy_z_range: tuple[float, float] = (0.7, float("inf")), + healthy_angle_range: tuple[float, float] = (-0.2, 0.2), reset_noise_scale: float = 5e-3, exclude_current_positions_from_observation: bool = True, **kwargs, diff --git a/gymnasium/envs/mujoco/humanoid_v5.py b/gymnasium/envs/mujoco/humanoid_v5.py index a03465056..8e4411a49 100644 --- a/gymnasium/envs/mujoco/humanoid_v5.py +++ b/gymnasium/envs/mujoco/humanoid_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Tuple, Union - import numpy as np from gymnasium import utils @@ -289,9 +287,9 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): - Renamed `info["reward_quadctrl"]` to `info["reward_ctrl"]` to be consistent with the other environments. - Removed `info["forward_reward"]` as it is equivalent to `info["reward_forward"]`. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3 - * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) + * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). - Note: the environment robot model was slightly changed at `gym==0.21.0` and training results are not comparable with `gym<0.21` and `gym>=0.21` (related [GitHub PR](https://github.com/openai/gym/pull/932/files)) - * v2: All continuous control environments now use mujoco-py >= 1.50 + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). - Note: the environment robot model was slightly changed at `gym==0.21.0` and training results are not comparable with `gym<0.21` and `gym>=0.21` (related [GitHub PR](https://github.com/openai/gym/pull/932/files)) * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release @@ -310,14 +308,14 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "humanoid.xml", frame_skip: int = 5, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, forward_reward_weight: float = 1.25, ctrl_cost_weight: float = 0.1, contact_cost_weight: float = 5e-7, - contact_cost_range: Tuple[float, float] = (-np.inf, 10.0), + contact_cost_range: tuple[float, float] = (-np.inf, 10.0), healthy_reward: float = 5.0, terminate_when_unhealthy: bool = True, - healthy_z_range: Tuple[float, float] = (1.0, 2.0), + healthy_z_range: tuple[float, float] = (1.0, 2.0), reset_noise_scale: float = 1e-2, exclude_current_positions_from_observation: bool = True, include_cinert_in_observation: bool = True, diff --git a/gymnasium/envs/mujoco/humanoidstandup_v5.py b/gymnasium/envs/mujoco/humanoidstandup_v5.py index 072128817..8c80c8d53 100644 --- a/gymnasium/envs/mujoco/humanoidstandup_v5.py +++ b/gymnasium/envs/mujoco/humanoidstandup_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Tuple, Union - import numpy as np from gymnasium import utils @@ -271,8 +269,8 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): - Added `info["x_position"]` & `info["y_position"]` which contain the observations excluded when `exclude_current_positions_from_observation == True`. - Added `info["z_distance_from_origin"]` which is the vertical distance of the "torso" body from its initial position. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3. - * v3: This environment does not have a v3 release. - * v2: All continuous control environments now use mujoco-py >= 1.50. + * v3: This environment does not have a v3 release. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release. """ @@ -290,11 +288,11 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "humanoidstandup.xml", frame_skip: int = 5, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, uph_cost_weight: float = 1, ctrl_cost_weight: float = 0.1, impact_cost_weight: float = 0.5e-6, - impact_cost_range: Tuple[float, float] = (-np.inf, 10.0), + impact_cost_range: tuple[float, float] = (-np.inf, 10.0), reset_noise_scale: float = 1e-2, exclude_current_positions_from_observation: bool = True, include_cinert_in_observation: bool = True, diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py index 24f545184..d2b5097c8 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Union - import numpy as np from gymnasium import utils @@ -130,8 +128,8 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): - Added `healthy_reward` argument to configure the reward function (defaults are effectively the same as in `v4`). - Added individual reward terms in `info` (`info["reward_survive"]`, `info["distance_penalty"]`, `info["velocity_penalty"]`). * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3. - * v3: This environment does not have a v3 release. - * v2: All continuous control environments now use mujoco-py >= 1.50. + * v3: This environment does not have a v3 release. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum). * v0: Initial versions release. """ @@ -149,7 +147,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "inverted_double_pendulum.xml", frame_skip: int = 5, - default_camera_config: Dict[str, Union[float, int]] = {}, + default_camera_config: dict[str, float | int] = {}, healthy_reward: float = 10.0, reset_noise_scale: float = 0.1, **kwargs, diff --git a/gymnasium/envs/mujoco/inverted_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_pendulum_v5.py index 0f33db175..0df60733e 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_pendulum_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Union - import numpy as np from gymnasium import utils @@ -105,8 +103,8 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): - Added `reset_noise_scale` argument to set the range of initial states. - Added `info["reward_survive"]` which contains the reward. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3. - * v3: This environment does not have a v3 release. - * v2: All continuous control environments now use mujoco-py >= 1.5. + * v3: This environment does not have a v3 release. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.5. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum). * v0: Initial versions release. """ @@ -124,7 +122,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "inverted_pendulum.xml", frame_skip: int = 2, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, reset_noise_scale: float = 0.01, **kwargs, ): diff --git a/gymnasium/envs/mujoco/mujoco_env.py b/gymnasium/envs/mujoco/mujoco_env.py index 5b7a95c61..672ccb0f5 100644 --- a/gymnasium/envs/mujoco/mujoco_env.py +++ b/gymnasium/envs/mujoco/mujoco_env.py @@ -1,5 +1,4 @@ from os import path -from typing import Dict, Optional, Tuple, Union import numpy as np from numpy.typing import NDArray @@ -41,15 +40,15 @@ class MujocoEnv(gym.Env): self, model_path: str, frame_skip: int, - observation_space: Optional[Space], - render_mode: Optional[str] = None, + observation_space: Space | None, + render_mode: str | None = None, width: int = DEFAULT_SIZE, height: int = DEFAULT_SIZE, - camera_id: Optional[int] = None, - camera_name: Optional[str] = None, - default_camera_config: Optional[Dict[str, Union[float, int]]] = None, + camera_id: int | None = None, + camera_name: str | None = None, + default_camera_config: dict[str, float | int] | None = None, max_geom: int = 1000, - visual_options: Dict[int, bool] = {}, + visual_options: dict[int, bool] = {}, ): """Base abstract class for mujoco based environments. @@ -116,7 +115,7 @@ class MujocoEnv(gym.Env): def _initialize_simulation( self, - ) -> Tuple["mujoco.MjModel", "mujoco.MjData"]: + ) -> tuple["mujoco.MjModel", "mujoco.MjData"]: """ Initialize MuJoCo simulation data structures `mjModel` and `mjData`. """ @@ -170,8 +169,8 @@ class MujocoEnv(gym.Env): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) @@ -210,7 +209,7 @@ class MujocoEnv(gym.Env): # ---------------------------- def step( self, action: NDArray[np.float32] - ) -> Tuple[NDArray[np.float64], np.float64, bool, bool, Dict[str, np.float64]]: + ) -> tuple[NDArray[np.float64], np.float64, bool, bool, dict[str, np.float64]]: raise NotImplementedError def reset_model(self) -> NDArray[np.float64]: @@ -220,7 +219,7 @@ class MujocoEnv(gym.Env): """ raise NotImplementedError - def _get_reset_info(self) -> Dict[str, float]: + def _get_reset_info(self) -> dict[str, float]: """Function that generates the `info` that is returned during a `reset()`.""" return {} diff --git a/gymnasium/envs/mujoco/mujoco_py_env.py b/gymnasium/envs/mujoco/mujoco_py_env.py new file mode 100644 index 000000000..e69de29bb diff --git a/gymnasium/envs/mujoco/mujoco_rendering.py b/gymnasium/envs/mujoco/mujoco_rendering.py index 82fdc6bee..4ecfcd7c3 100644 --- a/gymnasium/envs/mujoco/mujoco_rendering.py +++ b/gymnasium/envs/mujoco/mujoco_rendering.py @@ -1,6 +1,5 @@ import os import time -from typing import Dict, Optional import glfw import imageio @@ -49,7 +48,7 @@ class BaseRender: width: int, height: int, max_geom: int = 1000, - visual_options: Dict[int, bool] = {}, + visual_options: dict[int, bool] = {}, ): """Render context superclass for offscreen and window rendering.""" self.model = model @@ -182,7 +181,7 @@ class OffScreenViewer(BaseRender): width: int, height: int, max_geom: int = 1000, - visual_options: Dict[int, bool] = {}, + visual_options: dict[int, bool] = {}, ): # We must make GLContext before MjrContext self._get_opengl_backend(width, height) @@ -205,9 +204,7 @@ class OffScreenViewer(BaseRender): self.opengl_context = _ALL_RENDERERS[self.backend](width, height) except KeyError as e: raise RuntimeError( - "Environment variable {} must be one of {!r}: got {!r}.".format( - "MUJOCO_GL", _ALL_RENDERERS.keys(), self.backend - ) + f"Environment variable {'MUJOCO_GL'} must be one of {_ALL_RENDERERS.keys()!r}: got {self.backend!r}." ) from e else: @@ -238,8 +235,8 @@ class OffScreenViewer(BaseRender): def render( self, - render_mode: Optional[str], - camera_id: Optional[int] = None, + render_mode: str | None, + camera_id: int | None = None, segmentation: bool = False, ): if camera_id is not None: @@ -339,10 +336,10 @@ class WindowViewer(BaseRender): self, model: "mujoco.MjModel", data: "mujoco.MjData", - width: Optional[int] = None, - height: Optional[int] = None, + width: int | None = None, + height: int | None = None, max_geom: int = 1000, - visual_options: Dict[int, bool] = {}, + visual_options: dict[int, bool] = {}, ): glfw.init() @@ -694,13 +691,13 @@ class MujocoRenderer: self, model: "mujoco.MjModel", data: "mujoco.MjData", - default_cam_config: Optional[dict] = None, - width: Optional[int] = None, - height: Optional[int] = None, + default_cam_config: dict | None = None, + width: int | None = None, + height: int | None = None, max_geom: int = 1000, - camera_id: Optional[int] = None, - camera_name: Optional[str] = None, - visual_options: Dict[int, bool] = {}, + camera_id: int | None = None, + camera_name: str | None = None, + visual_options: dict[int, bool] = {}, ): """A wrapper for clipping continuous actions within the valid bound. @@ -746,7 +743,7 @@ class MujocoRenderer: def render( self, - render_mode: Optional[str], + render_mode: str | None, ): """Renders a frame of the simulation in a specific format and camera view. @@ -768,7 +765,7 @@ class MujocoRenderer: elif render_mode == "human": return viewer.render() - def _get_viewer(self, render_mode: Optional[str]): + def _get_viewer(self, render_mode: str | None): """Initializes and returns a viewer class depending on the render_mode - `WindowViewer` class for "human" render mode - `OffScreenViewer` class for "rgb_array", "depth_array", or "rgbd_tuple" render mode diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py index 6a12a0ecd..ac83d9853 100644 --- a/gymnasium/envs/mujoco/pusher_v5.py +++ b/gymnasium/envs/mujoco/pusher_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Union - import numpy as np from gymnasium import utils @@ -153,8 +151,8 @@ class PusherEnv(MujocoEnv, utils.EzPickle): - Added `info["reward_near"]` which is equal to the reward term `reward_near`. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3. - Warning: This version of the environment is not compatible with `mujoco>=3.0.0` (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium/issues/950)). - * v3: This environment does not have a v3 release. - * v2: All continuous control environments now use mujoco-py >= 1.50. + * v3: This environment does not have a v3 release. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks (not including pusher, which has a max_time_steps of 100). Added reward_threshold to environments. * v0: Initial versions release. """ @@ -172,7 +170,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "pusher_v5.xml", frame_skip: int = 5, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, reward_near_weight: float = 0.5, reward_dist_weight: float = 1, reward_control_weight: float = 0.1, diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py index 10237a719..f87e5e029 100644 --- a/gymnasium/envs/mujoco/reacher_v5.py +++ b/gymnasium/envs/mujoco/reacher_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Union - import numpy as np from gymnasium import utils @@ -130,8 +128,8 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): - Added `reward_dist_weight`, `reward_control_weight` arguments to configure the reward function (defaults are effectively the same as in `v4`). - Fixed `info["reward_ctrl"]` not being multiplied by the reward weight. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3 - * v3: This environment does not have a v3 release. - * v2: All continuous control environments now use mujoco-py >= 1.50 + * v3: This environment does not have a v3 release. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments. * v0: Initial versions release """ @@ -149,7 +147,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "reacher.xml", frame_skip: int = 2, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, reward_dist_weight: float = 1, reward_control_weight: float = 1, **kwargs, diff --git a/gymnasium/envs/mujoco/swimmer_v5.py b/gymnasium/envs/mujoco/swimmer_v5.py index 47147d448..a178076a4 100644 --- a/gymnasium/envs/mujoco/swimmer_v5.py +++ b/gymnasium/envs/mujoco/swimmer_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas", "Rushiv Arora"] -from typing import Dict, Union - import numpy as np from gymnasium import utils @@ -137,8 +135,8 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): - Added `exclude_current_positions_from_observation` argument. - Replaced `info["reward_fwd"]` and `info["forward_reward"]` with `info["reward_forward"]` to be consistent with the other environments. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3. - * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). - * v2: All continuous control environments now use mujoco-py >= 1.50. + * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release. """ @@ -156,7 +154,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "swimmer.xml", frame_skip: int = 4, - default_camera_config: Dict[str, Union[float, int]] = {}, + default_camera_config: dict[str, float | int] = {}, forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-4, reset_noise_scale: float = 0.1, diff --git a/gymnasium/envs/mujoco/walker2d_v5.py b/gymnasium/envs/mujoco/walker2d_v5.py index 0d2016e1c..b3fc12290 100644 --- a/gymnasium/envs/mujoco/walker2d_v5.py +++ b/gymnasium/envs/mujoco/walker2d_v5.py @@ -1,7 +1,5 @@ __credits__ = ["Kallinteris-Andreas"] -from typing import Dict, Tuple, Union - import numpy as np from gymnasium import utils @@ -157,8 +155,8 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): - Added individual reward terms in `info` (`info["reward_forward"]`, `info["reward_ctrl"]`, `info["reward_survive"]`). - Added `info["z_distance_from_origin"]` which is equal to the vertical distance of the "torso" body from its initial position. * v4: All MuJoCo environments now use the MuJoCo bindings in mujoco >= 2.1.3 - * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) - * v2: All continuous control environments now use mujoco-py >= 1.50 + * v3: Support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen). Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). + * v2: All continuous control environments now use mujoco-py >= 1.50. Moved to the [gymnasium-robotics repo](https://github.com/Farama-Foundation/gymnasium-robotics). * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release """ @@ -176,13 +174,13 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): self, xml_file: str = "walker2d_v5.xml", frame_skip: int = 4, - default_camera_config: Dict[str, Union[float, int]] = DEFAULT_CAMERA_CONFIG, + default_camera_config: dict[str, float | int] = DEFAULT_CAMERA_CONFIG, forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-3, healthy_reward: float = 1.0, terminate_when_unhealthy: bool = True, - healthy_z_range: Tuple[float, float] = (0.8, 2.0), - healthy_angle_range: Tuple[float, float] = (-1.0, 1.0), + healthy_z_range: tuple[float, float] = (0.8, 2.0), + healthy_angle_range: tuple[float, float] = (-1.0, 1.0), reset_noise_scale: float = 5e-3, exclude_current_positions_from_observation: bool = True, **kwargs, diff --git a/gymnasium/envs/phys2d/cartpole.py b/gymnasium/envs/phys2d/cartpole.py index 5c6f6cb3a..da3d6a8a8 100644 --- a/gymnasium/envs/phys2d/cartpole.py +++ b/gymnasium/envs/phys2d/cartpole.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, Tuple, TypeAlias +from typing import Any, TypeAlias import jax import jax.numpy as jnp @@ -19,7 +19,7 @@ from gymnasium.vector import AutoresetMode PRNGKeyType: TypeAlias = jax.Array StateType: TypeAlias = jax.Array -RenderStateType = Tuple["pygame.Surface", "pygame.time.Clock"] # type: ignore # noqa: F821 +RenderStateType = tuple["pygame.Surface", "pygame.time.Clock"] # type: ignore # noqa: F821 @struct.dataclass diff --git a/gymnasium/envs/phys2d/pendulum.py b/gymnasium/envs/phys2d/pendulum.py index f908e48b9..e9e08d951 100644 --- a/gymnasium/envs/phys2d/pendulum.py +++ b/gymnasium/envs/phys2d/pendulum.py @@ -3,7 +3,7 @@ from __future__ import annotations from os import path -from typing import Any, Optional, Tuple, TypeAlias +from typing import Any, Optional, TypeAlias import jax import jax.numpy as jnp @@ -20,7 +20,7 @@ from gymnasium.vector import AutoresetMode PRNGKeyType: TypeAlias = jax.Array StateType: TypeAlias = jax.Array -RenderStateType = Tuple["pygame.Surface", "pygame.time.Clock", Optional[float]] # type: ignore # noqa: F821 +RenderStateType = tuple["pygame.Surface", "pygame.time.Clock", Optional[float]] # type: ignore # noqa: F821 @struct.dataclass diff --git a/gymnasium/envs/registration.py b/gymnasium/envs/registration.py index 58bfb0894..c89ced923 100644 --- a/gymnasium/envs/registration.py +++ b/gymnasium/envs/registration.py @@ -7,15 +7,16 @@ import copy import dataclasses import difflib import importlib +import importlib.metadata as metadata import importlib.util import json import re -import sys from collections import defaultdict +from collections.abc import Callable, Iterable, Sequence from dataclasses import dataclass, field from enum import Enum from types import ModuleType -from typing import Any, Callable, Iterable, Sequence +from typing import Any, Protocol import gymnasium as gym from gymnasium import Env, Wrapper, error, logger @@ -23,14 +24,6 @@ from gymnasium.logger import warn from gymnasium.vector import AutoresetMode -if sys.version_info < (3, 10): - import importlib_metadata as metadata # type: ignore -else: - import importlib.metadata as metadata - -from typing import Protocol - - ENV_ID_RE = re.compile( r"^(?:(?P[\w:-]+)\/)?(?:(?P[\w:.-]+?))(?:-v(?P\d+))?$" ) @@ -606,7 +599,6 @@ def register( assert ( entry_point is not None or vector_entry_point is not None ), "Either `entry_point` or `vector_entry_point` (or both) must be provided" - global registry, current_namespace ns, name, version = parse_env_id(id) if kwargs is None: diff --git a/gymnasium/envs/tabular/blackjack.py b/gymnasium/envs/tabular/blackjack.py index 2bfdeeaa0..8248eb862 100644 --- a/gymnasium/envs/tabular/blackjack.py +++ b/gymnasium/envs/tabular/blackjack.py @@ -2,7 +2,7 @@ import math import os -from typing import NamedTuple, Optional, Tuple, TypeAlias, Union +from typing import NamedTuple, TypeAlias import jax import jax.numpy as jnp @@ -20,7 +20,7 @@ from gymnasium.wrappers import HumanRendering PRNGKeyType: TypeAlias = jax.Array -RenderStateType = Tuple["pygame.Surface", str, int] # type: ignore # noqa: F821 +RenderStateType = tuple["pygame.Surface", str, int] # type: ignore # noqa: F821 deck = jnp.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]) @@ -246,7 +246,7 @@ class BlackjackFunctional( def transition( self, state: EnvState, - action: Union[int, jax.Array], + action: int | jax.Array, key: PRNGKeyType, params: BlackJackParams = BlackJackParams, ) -> EnvState: @@ -381,7 +381,7 @@ class BlackjackFunctional( state: StateType, render_state: RenderStateType, params: BlackJackParams = BlackJackParams, - ) -> Tuple[RenderStateType, np.ndarray]: + ) -> tuple[RenderStateType, np.ndarray]: """Renders an image from a state.""" try: import pygame @@ -510,7 +510,7 @@ class BlackJackJaxEnv(FunctionalJaxEnv, EzPickle): metadata = {"render_modes": ["rgb_array"], "render_fps": 50, "jax": True} - def __init__(self, render_mode: Optional[str] = None, **kwargs): + def __init__(self, render_mode: str | None = None, **kwargs): """Initializes Gym wrapper for blackjack functional env.""" EzPickle.__init__(self, render_mode=render_mode, **kwargs) env = BlackjackFunctional(**kwargs) diff --git a/gymnasium/envs/toy_text/blackjack.py b/gymnasium/envs/toy_text/blackjack.py index 80fc7b5e0..e813dee4c 100644 --- a/gymnasium/envs/toy_text/blackjack.py +++ b/gymnasium/envs/toy_text/blackjack.py @@ -1,5 +1,4 @@ import os -from typing import Optional import numpy as np @@ -149,7 +148,7 @@ class BlackjackEnv(gym.Env): "render_fps": 4, } - def __init__(self, render_mode: Optional[str] = None, natural=False, sab=False): + def __init__(self, render_mode: str | None = None, natural=False, sab=False): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple( (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2)) @@ -201,8 +200,8 @@ class BlackjackEnv(gym.Env): def reset( self, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) self.dealer = draw_hand(self.np_random) diff --git a/gymnasium/envs/toy_text/cliffwalking.py b/gymnasium/envs/toy_text/cliffwalking.py index e8d0b14c5..30e36f53e 100644 --- a/gymnasium/envs/toy_text/cliffwalking.py +++ b/gymnasium/envs/toy_text/cliffwalking.py @@ -1,7 +1,7 @@ from contextlib import closing from io import StringIO from os import path -from typing import Any, List, Optional, Tuple, Union +from typing import Any import numpy as np @@ -101,7 +101,7 @@ class CliffWalkingEnv(Env): "render_fps": 4, } - def __init__(self, render_mode: Optional[str] = None, is_slippery: bool = False): + def __init__(self, render_mode: str | None = None, is_slippery: bool = False): self.shape = (4, 12) self.start_state_index = np.ravel_multi_index((3, 0), self.shape) @@ -159,8 +159,8 @@ class CliffWalkingEnv(Env): return coord def _calculate_transition_prob( - self, current: Union[List[int], np.ndarray], move: int - ) -> List[Tuple[float, Any, int, bool]]: + self, current: list[int] | np.ndarray, move: int + ) -> list[tuple[float, Any, int, bool]]: """Determine the outcome for an action. Transition Prob is always 1.0. Args: @@ -203,7 +203,7 @@ class CliffWalkingEnv(Env): # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return int(s), r, t, False, {"prob": p} - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): + def reset(self, *, seed: int | None = None, options: dict | None = None): super().reset(seed=seed) self.s = categorical_sample(self.initial_state_distrib, self.np_random) self.lastaction = None diff --git a/gymnasium/envs/toy_text/frozen_lake.py b/gymnasium/envs/toy_text/frozen_lake.py index 9b9c1bfa4..34bb93615 100644 --- a/gymnasium/envs/toy_text/frozen_lake.py +++ b/gymnasium/envs/toy_text/frozen_lake.py @@ -1,7 +1,6 @@ from contextlib import closing from io import StringIO from os import path -from typing import List, Optional import numpy as np @@ -33,7 +32,7 @@ MAPS = { # DFS to check that it's a valid path. -def is_valid(board: List[List[str]], max_size: int) -> bool: +def is_valid(board: list[list[str]], max_size: int) -> bool: frontier, discovered = [], set() frontier.append((0, 0)) while frontier: @@ -54,8 +53,8 @@ def is_valid(board: List[List[str]], max_size: int) -> bool: def generate_random_map( - size: int = 8, p: float = 0.8, seed: Optional[int] = None -) -> List[str]: + size: int = 8, p: float = 0.8, seed: int | None = None +) -> list[str]: """Generates a random valid map (one that has a path from start to goal) Args: @@ -222,7 +221,7 @@ class FrozenLakeEnv(Env): def __init__( self, - render_mode: Optional[str] = None, + render_mode: str | None = None, desc=None, map_name="4x4", is_slippery=True, @@ -317,8 +316,8 @@ class FrozenLakeEnv(Env): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) self.s = categorical_sample(self.initial_state_distrib, self.np_random) diff --git a/gymnasium/envs/toy_text/taxi.py b/gymnasium/envs/toy_text/taxi.py index bac6977bb..9d809951a 100644 --- a/gymnasium/envs/toy_text/taxi.py +++ b/gymnasium/envs/toy_text/taxi.py @@ -1,7 +1,6 @@ from contextlib import closing from io import StringIO from os import path -from typing import Optional import numpy as np @@ -279,7 +278,7 @@ class TaxiEnv(Env): def __init__( self, - render_mode: Optional[str] = None, + render_mode: str | None = None, is_rainy: bool = False, fickle_passenger: bool = False, ): @@ -425,8 +424,8 @@ class TaxiEnv(Env): def reset( self, *, - seed: Optional[int] = None, - options: Optional[dict] = None, + seed: int | None = None, + options: dict | None = None, ): super().reset(seed=seed) self.s = categorical_sample(self.initial_state_distrib, self.np_random) diff --git a/gymnasium/experimental/functional.py b/gymnasium/experimental/functional.py index bdcbb40d7..51fa23156 100644 --- a/gymnasium/experimental/functional.py +++ b/gymnasium/experimental/functional.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, Callable, Generic, TypeVar +from collections.abc import Callable +from typing import Any, Generic, TypeVar import numpy as np diff --git a/gymnasium/logger.py b/gymnasium/logger.py index 5c999aaed..dc70bace5 100644 --- a/gymnasium/logger.py +++ b/gymnasium/logger.py @@ -1,7 +1,6 @@ """Set of functions for logging messages.""" import warnings -from typing import Optional, Type from gymnasium.utils import colorize @@ -19,7 +18,7 @@ warnings.filterwarnings("once", "", DeprecationWarning, module=r"^gymnasium\.") def warn( msg: str, *args: object, - category: Optional[Type[Warning]] = None, + category: type[Warning] | None = None, stacklevel: int = 1, ): """Raises a warning to the user if the min_level <= WARN. diff --git a/gymnasium/spaces/box.py b/gymnasium/spaces/box.py index 96af30551..4d1fe6130 100644 --- a/gymnasium/spaces/box.py +++ b/gymnasium/spaces/box.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, Iterable, Mapping, Sequence, SupportsFloat +from collections.abc import Iterable, Mapping, Sequence +from typing import Any, SupportsFloat import numpy as np from numpy.typing import NDArray diff --git a/gymnasium/spaces/dict.py b/gymnasium/spaces/dict.py index b25af0ff7..0a241a966 100644 --- a/gymnasium/spaces/dict.py +++ b/gymnasium/spaces/dict.py @@ -5,14 +5,15 @@ from __future__ import annotations import collections.abc import typing from collections import OrderedDict -from typing import Any, KeysView, Sequence +from collections.abc import KeysView, Sequence +from typing import Any import numpy as np from gymnasium.spaces.space import Space -class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]): +class Dict(Space[dict[str, Any]], typing.Mapping[str, Space[Any]]): """A dictionary of :class:`Space` instances. Elements of this space are (ordered) dictionaries of elements from the constituent spaces. diff --git a/gymnasium/spaces/discrete.py b/gymnasium/spaces/discrete.py index 36d0d33f4..38b96a3f6 100644 --- a/gymnasium/spaces/discrete.py +++ b/gymnasium/spaces/discrete.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, Iterable, Mapping, Sequence +from collections.abc import Iterable, Mapping, Sequence +from typing import Any import numpy as np diff --git a/gymnasium/spaces/graph.py b/gymnasium/spaces/graph.py index ec6938db9..eee30a523 100644 --- a/gymnasium/spaces/graph.py +++ b/gymnasium/spaces/graph.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, NamedTuple, Sequence +from collections.abc import Sequence +from typing import Any, NamedTuple import numpy as np from numpy.typing import NDArray diff --git a/gymnasium/spaces/multi_binary.py b/gymnasium/spaces/multi_binary.py index 952adf95a..229102cc3 100644 --- a/gymnasium/spaces/multi_binary.py +++ b/gymnasium/spaces/multi_binary.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, Sequence +from collections.abc import Sequence +from typing import Any import numpy as np from numpy.typing import NDArray diff --git a/gymnasium/spaces/multi_discrete.py b/gymnasium/spaces/multi_discrete.py index ceabe6134..9f4c876e8 100644 --- a/gymnasium/spaces/multi_discrete.py +++ b/gymnasium/spaces/multi_discrete.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, Iterable, Mapping, Sequence +from collections.abc import Iterable, Mapping, Sequence +from typing import Any import numpy as np from numpy.typing import NDArray diff --git a/gymnasium/spaces/oneof.py b/gymnasium/spaces/oneof.py index 24323038a..754c4e912 100644 --- a/gymnasium/spaces/oneof.py +++ b/gymnasium/spaces/oneof.py @@ -3,7 +3,8 @@ from __future__ import annotations import typing -from typing import Any, Iterable +from collections.abc import Iterable +from typing import Any import numpy as np diff --git a/gymnasium/spaces/sequence.py b/gymnasium/spaces/sequence.py index b33caeb62..4e7faa9fb 100644 --- a/gymnasium/spaces/sequence.py +++ b/gymnasium/spaces/sequence.py @@ -12,7 +12,7 @@ import gymnasium as gym from gymnasium.spaces.space import Space -class Sequence(Space[Union[typing.Tuple[Any, ...], Any]]): +class Sequence(Space[Union[tuple[Any, ...], Any]]): r"""This space represent sets of finite-length sequences. This space represents the set of tuples of the form :math:`(a_0, \dots, a_n)` where the :math:`a_i` belong diff --git a/gymnasium/spaces/space.py b/gymnasium/spaces/space.py index 39bf1ddc9..c45711444 100644 --- a/gymnasium/spaces/space.py +++ b/gymnasium/spaces/space.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, Generic, Iterable, Mapping, Sequence, TypeAlias, TypeVar +from collections.abc import Iterable, Mapping, Sequence +from typing import Any, Generic, TypeAlias, TypeVar import numpy as np import numpy.typing as npt diff --git a/gymnasium/spaces/tuple.py b/gymnasium/spaces/tuple.py index ade879d4d..da92656ae 100644 --- a/gymnasium/spaces/tuple.py +++ b/gymnasium/spaces/tuple.py @@ -3,14 +3,15 @@ from __future__ import annotations import typing -from typing import Any, Iterable +from collections.abc import Iterable +from typing import Any import numpy as np from gymnasium.spaces.space import Space -class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]): +class Tuple(Space[tuple[Any, ...]], typing.Sequence[Any]): """A tuple (more precisely: the cartesian product) of :class:`Space` instances. Elements of this space are tuples of elements of the constituent spaces. diff --git a/gymnasium/spaces/utils.py b/gymnasium/spaces/utils.py index 63ae192b7..aa28973c7 100644 --- a/gymnasium/spaces/utils.py +++ b/gymnasium/spaces/utils.py @@ -7,7 +7,6 @@ These functions mostly take care of flattening and unflattening elements of spac from __future__ import annotations import operator as op -import typing from functools import reduce, singledispatch from typing import Any, TypeVar, Union @@ -111,9 +110,7 @@ def _flatdim_oneof(space: OneOf) -> int: T = TypeVar("T") -FlatType = Union[ - NDArray[Any], typing.Dict[str, Any], typing.Tuple[Any, ...], GraphInstance -] +FlatType = Union[NDArray[Any], dict[str, Any], tuple[Any, ...], GraphInstance] @singledispatch diff --git a/gymnasium/utils/passive_env_checker.py b/gymnasium/utils/passive_env_checker.py index 0b08fc516..0a4d2ce32 100644 --- a/gymnasium/utils/passive_env_checker.py +++ b/gymnasium/utils/passive_env_checker.py @@ -1,8 +1,8 @@ """A set of functions for passively checking environment implementations.""" import inspect +from collections.abc import Callable from functools import partial -from typing import Callable import numpy as np diff --git a/gymnasium/utils/performance.py b/gymnasium/utils/performance.py index 54d4da7b8..dd7ea7849 100644 --- a/gymnasium/utils/performance.py +++ b/gymnasium/utils/performance.py @@ -1,7 +1,7 @@ """A collection of runtime performance bencharks, useful for debugging performance related issues.""" import time -from typing import Callable +from collections.abc import Callable import gymnasium diff --git a/gymnasium/utils/play.py b/gymnasium/utils/play.py index 4f842e2e3..f22eed278 100644 --- a/gymnasium/utils/play.py +++ b/gymnasium/utils/play.py @@ -3,7 +3,8 @@ from __future__ import annotations from collections import deque -from typing import TYPE_CHECKING, Callable, List +from collections.abc import Callable +from typing import TYPE_CHECKING import numpy as np @@ -89,7 +90,7 @@ class PlayableGame: def _get_video_size(self, zoom: float | None = None) -> tuple[int, int]: rendered = self.env.render() - if isinstance(rendered, List): + if isinstance(rendered, list): rendered = rendered[-1] assert rendered is not None and isinstance(rendered, np.ndarray) video_size = (rendered.shape[1], rendered.shape[0]) @@ -299,7 +300,7 @@ def play( callback(prev_obs, obs, action, rew, terminated, truncated, info) if obs is not None: rendered = env.render() - if isinstance(rendered, List): + if isinstance(rendered, list): rendered = rendered[-1] assert rendered is not None and isinstance(rendered, np.ndarray) display_arr( diff --git a/gymnasium/utils/save_video.py b/gymnasium/utils/save_video.py index 9185af180..2c97ca20a 100644 --- a/gymnasium/utils/save_video.py +++ b/gymnasium/utils/save_video.py @@ -3,7 +3,7 @@ from __future__ import annotations import os -from typing import Callable +from collections.abc import Callable import gymnasium as gym from gymnasium import logger diff --git a/gymnasium/utils/step_api_compatibility.py b/gymnasium/utils/step_api_compatibility.py index cacb727f6..539b51383 100644 --- a/gymnasium/utils/step_api_compatibility.py +++ b/gymnasium/utils/step_api_compatibility.py @@ -2,21 +2,21 @@ from __future__ import annotations -from typing import SupportsFloat, Tuple, Union +from typing import SupportsFloat, Union import numpy as np from gymnasium.core import ObsType -DoneStepType = Tuple[ +DoneStepType = tuple[ Union[ObsType, np.ndarray], Union[SupportsFloat, np.ndarray], Union[bool, np.ndarray], Union[dict, list], ] -TerminatedTruncatedStepType = Tuple[ +TerminatedTruncatedStepType = tuple[ Union[ObsType, np.ndarray], Union[SupportsFloat, np.ndarray], Union[bool, np.ndarray], diff --git a/gymnasium/vector/async_vector_env.py b/gymnasium/vector/async_vector_env.py index 75dc9f937..78584636d 100644 --- a/gymnasium/vector/async_vector_env.py +++ b/gymnasium/vector/async_vector_env.py @@ -6,12 +6,13 @@ import multiprocessing import sys import time import traceback +from collections.abc import Callable, Sequence from copy import deepcopy from enum import Enum from multiprocessing import Queue from multiprocessing.connection import Connection from multiprocessing.sharedctypes import SynchronizedArray -from typing import Any, Callable, Sequence +from typing import Any import numpy as np diff --git a/gymnasium/vector/sync_vector_env.py b/gymnasium/vector/sync_vector_env.py index 26e61ffba..cb1870d96 100644 --- a/gymnasium/vector/sync_vector_env.py +++ b/gymnasium/vector/sync_vector_env.py @@ -2,8 +2,9 @@ from __future__ import annotations +from collections.abc import Callable, Iterator, Sequence from copy import deepcopy -from typing import Any, Callable, Iterator, Sequence +from typing import Any import numpy as np diff --git a/gymnasium/vector/utils/space_utils.py b/gymnasium/vector/utils/space_utils.py index b0b74d538..a49855e1c 100644 --- a/gymnasium/vector/utils/space_utils.py +++ b/gymnasium/vector/utils/space_utils.py @@ -10,10 +10,10 @@ from __future__ import annotations import typing -from collections.abc import Callable +from collections.abc import Callable, Iterable, Iterator from copy import deepcopy from functools import singledispatch -from typing import Any, Iterable, Iterator +from typing import Any import numpy as np diff --git a/gymnasium/wrappers/array_conversion.py b/gymnasium/wrappers/array_conversion.py index b974423aa..5416facf3 100644 --- a/gymnasium/wrappers/array_conversion.py +++ b/gymnasium/wrappers/array_conversion.py @@ -20,8 +20,9 @@ import functools import importlib import numbers from collections import abc +from collections.abc import Iterable, Mapping from types import ModuleType, NoneType -from typing import Any, Iterable, Mapping, SupportsFloat +from typing import Any, SupportsFloat import numpy as np from packaging.version import Version diff --git a/gymnasium/wrappers/rendering.py b/gymnasium/wrappers/rendering.py index bc1069f4c..12142911e 100644 --- a/gymnasium/wrappers/rendering.py +++ b/gymnasium/wrappers/rendering.py @@ -11,8 +11,9 @@ from __future__ import annotations import gc import os +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable, Generic, List, SupportsFloat +from typing import Any, Generic, SupportsFloat import numpy as np @@ -322,7 +323,7 @@ class RecordVideo( assert self.recording, "Cannot capture a frame, recording wasn't started." frame = self.env.render() - if isinstance(frame, List): + if isinstance(frame, list): if len(frame) == 0: # render was called return self.render_history += frame @@ -375,7 +376,7 @@ class RecordVideo( def render(self) -> RenderFrame | list[RenderFrame]: """Compute the render frames as specified by render_mode attribute during initialization of the environment.""" render_out = super().render() - if self.recording and isinstance(render_out, List): + if self.recording and isinstance(render_out, list): self.recorded_frames += render_out if len(self.render_history) > 0: diff --git a/gymnasium/wrappers/transform_action.py b/gymnasium/wrappers/transform_action.py index a069ab04f..69039e9a3 100644 --- a/gymnasium/wrappers/transform_action.py +++ b/gymnasium/wrappers/transform_action.py @@ -7,7 +7,7 @@ from __future__ import annotations -from typing import Callable +from collections.abc import Callable import numpy as np diff --git a/gymnasium/wrappers/transform_observation.py b/gymnasium/wrappers/transform_observation.py index 824a401c3..5db1ef097 100644 --- a/gymnasium/wrappers/transform_observation.py +++ b/gymnasium/wrappers/transform_observation.py @@ -13,7 +13,8 @@ from __future__ import annotations -from typing import Any, Callable, Final, Sequence +from collections.abc import Callable, Sequence +from typing import Any, Final import numpy as np diff --git a/gymnasium/wrappers/transform_reward.py b/gymnasium/wrappers/transform_reward.py index b17308c25..673a0c03c 100644 --- a/gymnasium/wrappers/transform_reward.py +++ b/gymnasium/wrappers/transform_reward.py @@ -6,7 +6,8 @@ from __future__ import annotations -from typing import Callable, SupportsFloat +from collections.abc import Callable +from typing import SupportsFloat import numpy as np diff --git a/gymnasium/wrappers/utils.py b/gymnasium/wrappers/utils.py index 5fc896c8f..ea6a0ebe1 100644 --- a/gymnasium/wrappers/utils.py +++ b/gymnasium/wrappers/utils.py @@ -2,8 +2,8 @@ from __future__ import annotations +from collections.abc import Callable from functools import singledispatch -from typing import Callable import numpy as np diff --git a/gymnasium/wrappers/vector/vectorize_action.py b/gymnasium/wrappers/vector/vectorize_action.py index 8fc607107..6375eebe7 100644 --- a/gymnasium/wrappers/vector/vectorize_action.py +++ b/gymnasium/wrappers/vector/vectorize_action.py @@ -2,8 +2,9 @@ from __future__ import annotations +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable +from typing import Any import numpy as np diff --git a/gymnasium/wrappers/vector/vectorize_observation.py b/gymnasium/wrappers/vector/vectorize_observation.py index 5ca07a1e4..d3b840a44 100644 --- a/gymnasium/wrappers/vector/vectorize_observation.py +++ b/gymnasium/wrappers/vector/vectorize_observation.py @@ -2,8 +2,9 @@ from __future__ import annotations +from collections.abc import Callable, Sequence from copy import deepcopy -from typing import Any, Callable, Sequence +from typing import Any import numpy as np diff --git a/gymnasium/wrappers/vector/vectorize_reward.py b/gymnasium/wrappers/vector/vectorize_reward.py index 5f80192c4..51c2b3f16 100644 --- a/gymnasium/wrappers/vector/vectorize_reward.py +++ b/gymnasium/wrappers/vector/vectorize_reward.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Any, Callable +from collections.abc import Callable +from typing import Any import numpy as np diff --git a/pyproject.toml b/pyproject.toml index 01f70d0e9..0ef4d494b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] @@ -35,7 +36,7 @@ dynamic = ["version"] atari = ["ale_py >=0.9"] box2d = ["box2d-py ==2.3.5", "pygame >=2.1.3", "swig ==4.*"] classic-control = ["pygame >=2.1.3"] -classic_control = ["pygame >=2.1.3"] # kept for backward compatibility +classic_control = ["pygame >=2.1.3"] # kept for backward compatibility mujoco = ["mujoco >=2.1.5", "imageio >=2.14.1", "packaging >=23.0"] toy-text = ["pygame >=2.1.3"] toy_text = ["pygame >=2.1.3"] # kept for backward compatibility @@ -163,6 +164,5 @@ reportOperatorIssue = "none" # TODO fix one by one reportOptionalMemberAccess = "none" # TODO fix one by one reportAssignmentType = "none" # TODO fix one by one - [tool.pytest.ini_options] filterwarnings = ["ignore::DeprecationWarning:gymnasium.*:"] diff --git a/tests/envs/functional/test_core.py b/tests/envs/functional/test_core.py index 3b7349e04..8cea2b909 100644 --- a/tests/envs/functional/test_core.py +++ b/tests/envs/functional/test_core.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional +from typing import Any import numpy as np @@ -6,7 +6,7 @@ from gymnasium.experimental.functional import FuncEnv class BasicTestEnv(FuncEnv): - def __init__(self, options: Optional[Dict[str, Any]] = None): + def __init__(self, options: dict[str, Any] | None = None): super().__init__(options) def initial(self, rng: Any) -> np.ndarray: diff --git a/tests/envs/registration/test_register.py b/tests/envs/registration/test_register.py index 4fd946a0b..7edebc212 100644 --- a/tests/envs/registration/test_register.py +++ b/tests/envs/registration/test_register.py @@ -1,7 +1,6 @@ """Tests that `gymnasium.register` works as expected.""" import re -from typing import Optional import pytest @@ -61,9 +60,7 @@ def register_registration_testing_envs(): ("MyAwesomeEnv-v", None, "MyAwesomeEnv-v", None), ], ) -def test_register( - env_id: str, namespace: Optional[str], name: str, version: Optional[int] -): +def test_register(env_id: str, namespace: str | None, name: str, version: int | None): gym.register(env_id, "no-entry-point") assert gym.spec(env_id).id == env_id diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py index 3d9006b6e..f8f3c1ff4 100644 --- a/tests/envs/test_env_implementation.py +++ b/tests/envs/test_env_implementation.py @@ -1,5 +1,3 @@ -from typing import Optional - import numpy as np import pytest @@ -293,7 +291,7 @@ def test_taxi_fickle_passenger(): @pytest.mark.parametrize( "low_high", [None, (-0.4, 0.4), (np.array(-0.4), np.array(0.4))] ) -def test_customizable_resets(env_name: str, low_high: Optional[list]): +def test_customizable_resets(env_name: str, low_high: list | None): env = gym.make(env_name) env.action_space.seed(0) # First ensure we can do a reset. @@ -316,7 +314,7 @@ def test_customizable_resets(env_name: str, low_high: Optional[list]): (np.array(1.2), np.array(1.0)), ], ) -def test_customizable_pendulum_resets(low_high: Optional[list]): +def test_customizable_pendulum_resets(low_high: list | None): env = gym.make("Pendulum-v1") env.action_space.seed(0) # First ensure we can do a reset and the values are within expected ranges. diff --git a/tests/envs/utils.py b/tests/envs/utils.py index 415e9fd5a..bb49d82b8 100644 --- a/tests/envs/utils.py +++ b/tests/envs/utils.py @@ -1,13 +1,11 @@ """Finds all the specs that we can test with""" -from typing import List, Optional - import gymnasium as gym from gymnasium import logger from gymnasium.envs.registration import EnvSpec -def try_make_env(env_spec: EnvSpec) -> Optional[gym.Env]: +def try_make_env(env_spec: EnvSpec) -> gym.Env | None: """Tries to make the environment showing if it is possible. Warning the environments have no wrappers, including time limit and order enforcing. @@ -30,23 +28,23 @@ def try_make_env(env_spec: EnvSpec) -> Optional[gym.Env]: # Tries to make all environment to test with -all_testing_initialised_envs: List[Optional[gym.Env]] = [ +all_testing_initialised_envs: list[gym.Env | None] = [ try_make_env(env_spec) for env_spec in gym.envs.registry.values() ] -all_testing_initialised_envs: List[gym.Env] = [ +all_testing_initialised_envs: list[gym.Env] = [ env for env in all_testing_initialised_envs if env is not None ] # All testing, mujoco and gymnasium environment specs -all_testing_env_specs: List[EnvSpec] = [ +all_testing_env_specs: list[EnvSpec] = [ env.spec for env in all_testing_initialised_envs ] -mujoco_testing_env_specs: List[EnvSpec] = [ +mujoco_testing_env_specs: list[EnvSpec] = [ env_spec for env_spec in all_testing_env_specs if "gymnasium.envs.mujoco" in env_spec.entry_point ] -gym_testing_env_specs: List[EnvSpec] = [ +gym_testing_env_specs: list[EnvSpec] = [ env_spec for env_spec in all_testing_env_specs if any( diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py index 6df5963dc..c2512b568 100644 --- a/tests/spaces/test_spaces.py +++ b/tests/spaces/test_spaces.py @@ -3,7 +3,7 @@ import itertools import json # note: ujson fails this test due to float equality import pickle import tempfile -from typing import Callable, List, Union +from collections.abc import Callable import numpy as np import pytest @@ -400,9 +400,7 @@ def test_space_sample_mask(space: Space, mask, n_trials: int = 100): assert np.all(variance < scipy.stats.chi2.isf(ALPHA, df=1)) elif isinstance(space, MultiDiscrete): # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes - def _generate_frequency( - _dim: Union[np.ndarray, int], _mask, func: Callable - ) -> List: + def _generate_frequency(_dim: np.ndarray | int, _mask, func: Callable) -> list: if isinstance(_dim, np.ndarray): return [ _generate_frequency(sub_dim, sub_mask, func) diff --git a/tests/spaces/test_utils.py b/tests/spaces/test_utils.py index 66030bf47..6e4b48543 100644 --- a/tests/spaces/test_utils.py +++ b/tests/spaces/test_utils.py @@ -1,5 +1,4 @@ from itertools import zip_longest -from typing import Optional import numpy as np import pytest @@ -77,7 +76,7 @@ assert len(TESTING_SPACES) == len(TESTING_SPACES_EXPECTED_FLATDIMS) zip_longest(TESTING_SPACES, TESTING_SPACES_EXPECTED_FLATDIMS), ids=TESTING_SPACES_IDS, ) -def test_flatdim(space: gym.spaces.Space, flatdim: Optional[int]): +def test_flatdim(space: gym.spaces.Space, flatdim: int | None): """Checks that the flattened dims of the space is equal to an expected value.""" if space.is_np_flattenable: dim = utils.flatdim(space) diff --git a/tests/spaces/utils.py b/tests/spaces/utils.py index 4f1b4990a..b1849a4f8 100644 --- a/tests/spaces/utils.py +++ b/tests/spaces/utils.py @@ -1,5 +1,3 @@ -from typing import List - import numpy as np from gymnasium.spaces import ( @@ -116,7 +114,7 @@ TESTING_COMPOSITE_SPACES = [ ] TESTING_COMPOSITE_SPACES_IDS = [f"{space}" for space in TESTING_COMPOSITE_SPACES] -TESTING_SPACES: List[Space] = TESTING_FUNDAMENTAL_SPACES + TESTING_COMPOSITE_SPACES +TESTING_SPACES: list[Space] = TESTING_FUNDAMENTAL_SPACES + TESTING_COMPOSITE_SPACES TESTING_SPACES_IDS = TESTING_FUNDAMENTAL_SPACES_IDS + TESTING_COMPOSITE_SPACES_IDS diff --git a/tests/utils/test_env_checker.py b/tests/utils/test_env_checker.py index e365c4fa7..45cefd77f 100644 --- a/tests/utils/test_env_checker.py +++ b/tests/utils/test_env_checker.py @@ -2,7 +2,7 @@ import re import warnings -from typing import Callable, Tuple, Union +from collections.abc import Callable import numpy as np import pytest @@ -137,7 +137,7 @@ def test_check_reset_seed_determinism(test, func: Callable, message: str): def _deprecated_return_info( self, return_info: bool = False -) -> Union[Tuple[ObsType, dict], ObsType]: +) -> tuple[ObsType, dict] | ObsType: """function to simulate the signature and behavior of a `reset` function with the deprecated `return_info` optional argument""" if return_info: return self.observation_space.sample(), {} diff --git a/tests/utils/test_passive_env_checker.py b/tests/utils/test_passive_env_checker.py index 38ba95b48..77e29ee31 100644 --- a/tests/utils/test_passive_env_checker.py +++ b/tests/utils/test_passive_env_checker.py @@ -1,6 +1,6 @@ import re import warnings -from typing import Callable, Dict, Union +from collections.abc import Callable import numpy as np import pytest @@ -143,7 +143,7 @@ def test_check_observation_space(test, space, message: str): ], ) def test_check_action_space( - test: Union[UserWarning, type], space: spaces.Space, message: str + test: UserWarning | type, space: spaces.Space, message: str ): """Tests the check action space function.""" if test is UserWarning: @@ -267,7 +267,7 @@ def _make_reset_results(results): ], ], ) -def test_passive_env_reset_checker(test, func: Callable, message: str, kwargs: Dict): +def test_passive_env_reset_checker(test, func: Callable, message: str, kwargs: dict): """Tests the passive env reset check""" if test is UserWarning: with pytest.warns( @@ -352,7 +352,7 @@ def _modified_step( ], ) def test_passive_env_step_checker( - test: Union[UserWarning, type], func: Callable, message: str + test: UserWarning | type, func: Callable, message: str ): """Tests the passive env step checker.""" if test is UserWarning: diff --git a/tests/utils/test_play.py b/tests/utils/test_play.py index b3941243c..ff31e90f6 100644 --- a/tests/utils/test_play.py +++ b/tests/utils/test_play.py @@ -1,6 +1,6 @@ +from collections.abc import Callable from functools import partial from itertools import product -from typing import Callable import numpy as np import pygame diff --git a/tests/vector/testing_utils.py b/tests/vector/testing_utils.py index 7407dba47..8a94ef1e6 100644 --- a/tests/vector/testing_utils.py +++ b/tests/vector/testing_utils.py @@ -1,7 +1,6 @@ """Testing utilitys for `gymnasium.vector`.""" import time -from typing import Optional import numpy as np @@ -67,7 +66,7 @@ class SlowEnv(gym.Env): ) self.action_space = Box(low=0.0, high=1.0, shape=(), dtype=np.float32) - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): + def reset(self, *, seed: int | None = None, options: dict | None = None): """Resets the environment with a time sleep.""" super().reset(seed=seed) if self.slow_reset > 0: @@ -113,7 +112,7 @@ class CustomSpaceEnv(gym.Env): self.observation_space = CustomSpace() self.action_space = CustomSpace() - def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): + def reset(self, *, seed: int | None = None, options: dict | None = None): """Resets the environment.""" super().reset(seed=seed) return "reset", {} diff --git a/tests/vector/utils/test_space_utils.py b/tests/vector/utils/test_space_utils.py index c03df1269..064f593fe 100644 --- a/tests/vector/utils/test_space_utils.py +++ b/tests/vector/utils/test_space_utils.py @@ -2,7 +2,7 @@ import copy import re -from typing import Iterable +from collections.abc import Iterable import numpy as np import pytest diff --git a/tests/wrappers/test_record_video.py b/tests/wrappers/test_record_video.py index b7bdcb1f2..6dab9ca24 100644 --- a/tests/wrappers/test_record_video.py +++ b/tests/wrappers/test_record_video.py @@ -2,7 +2,6 @@ import os import shutil -from typing import List import numpy as np import pytest @@ -151,13 +150,13 @@ def test_with_rgb_array_list(n_steps: int = 10): env.step(env.action_space.sample()) render_out = env.render() - assert isinstance(render_out, List) + assert isinstance(render_out, list) assert len(render_out) == n_steps + 1 assert all(isinstance(render, np.ndarray) for render in render_out) assert all(render.ndim == 3 for render in render_out) render_out = env.render() - assert isinstance(render_out, List) + assert isinstance(render_out, list) assert len(render_out) == 0 env.close() @@ -173,13 +172,13 @@ def test_with_rgb_array_list(n_steps: int = 10): env.step(env.action_space.sample()) render_out = env.render() - assert isinstance(render_out, List) + assert isinstance(render_out, list) assert len(render_out) == n_steps + 1 assert all(isinstance(render, np.ndarray) for render in render_out) assert all(render.ndim == 3 for render in render_out) render_out = env.render() - assert isinstance(render_out, List) + assert isinstance(render_out, list) assert len(render_out) == 0 env.close() diff --git a/tests/wrappers/vector/test_normalize_reward.py b/tests/wrappers/vector/test_normalize_reward.py index 3c7451fa5..ec649d45f 100644 --- a/tests/wrappers/vector/test_normalize_reward.py +++ b/tests/wrappers/vector/test_normalize_reward.py @@ -1,7 +1,5 @@ """Test suite for vector NormalizeReward wrapper.""" -from typing import Optional - import numpy as np from gymnasium import wrappers @@ -10,7 +8,7 @@ from gymnasium.vector import SyncVectorEnv from tests.testing_env import GenericTestEnv -def reset_func(self, seed: Optional[int] = None, options: Optional[dict] = None): +def reset_func(self, seed: int | None = None, options: dict | None = None): self.step_id = 0 return self.observation_space.sample(), {}