Updated gymnasium to be equivalent to gym v26.2 (#36)

2025-08-01 22:11:25 +00:00 · 2022-10-05 17:53:45 +01:00
parent dc60cdc4af
commit f6489c38b3
21 changed files with 190 additions and 13 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,6 +1,9 @@
 name: build
 on: [pull_request, push]
 permissions:
  contents: read # to fetch code (actions/checkout)
 jobs:
  build:
    runs-on: ubuntu-latest
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -5,6 +5,10 @@ on:
  pull_request:
  push:
    branches: [master]
 permissions:
  contents: read # to fetch code (actions/checkout)
 jobs:
  pre-commit:
    runs-on: ubuntu-latest
--- a/gymnasium/envs/box2d/bipedal_walker.py
+++ b/gymnasium/envs/box2d/bipedal_walker.py
@@ -608,6 +608,14 @@ class BipedalWalker(gym.Env, EzPickle):
        return np.array(state, dtype=np.float32), reward, terminated, False, {}
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
            from pygame import gfxdraw
--- a/gymnasium/envs/box2d/car_racing.py
+++ b/gymnasium/envs/box2d/car_racing.py
@@ -569,7 +569,15 @@ class CarRacing(gym.Env, EzPickle):
        return self.state, step_reward, terminated, truncated, {}
    def render(self):
-        return self._render(self.render_mode)
+        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        else:
            return self._render(self.render_mode)
    def _render(self, mode: str):
        assert mode in self.metadata["render_modes"]
--- a/gymnasium/envs/box2d/lunar_lander.py
+++ b/gymnasium/envs/box2d/lunar_lander.py
@@ -602,6 +602,14 @@ class LunarLander(gym.Env, EzPickle):
        return np.array(state, dtype=np.float32), reward, terminated, False, {}
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
            from pygame import gfxdraw
--- a/gymnasium/envs/classic_control/acrobot.py
+++ b/gymnasium/envs/classic_control/acrobot.py
@@ -4,7 +4,9 @@ from typing import Optional
 import numpy as np
 from numpy import cos, pi, sin
 import gymnasium as gym
 from gymnasium import Env, spaces
 from gymnasium.envs.classic_control import utils
 from gymnasium.error import DependencyNotInstalled
 __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
@@ -20,7 +22,6 @@ __author__ = "Christoph Dann <cdann@cdann.de>"
 # SOURCE:
 # https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
 from gymnasium.envs.classic_control import utils
 class AcrobotEnv(Env):
@@ -280,6 +281,14 @@ class AcrobotEnv(Env):
        return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
            from pygame import gfxdraw
--- a/gymnasium/envs/classic_control/cartpole.py
+++ b/gymnasium/envs/classic_control/cartpole.py
@@ -209,6 +209,14 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
        return np.array(self.state, dtype=np.float32), {}
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
            from pygame import gfxdraw
--- a/gymnasium/envs/classic_control/continuous_mountain_car.py
+++ b/gymnasium/envs/classic_control/continuous_mountain_car.py
@@ -193,6 +193,14 @@ class Continuous_MountainCarEnv(gym.Env):
        return np.sin(3 * xs) * 0.45 + 0.55
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
            from pygame import gfxdraw
--- a/gymnasium/envs/classic_control/mountain_car.py
+++ b/gymnasium/envs/classic_control/mountain_car.py
@@ -170,6 +170,14 @@ class MountainCarEnv(gym.Env):
        return np.sin(3 * xs) * 0.45 + 0.55
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
            from pygame import gfxdraw
--- a/gymnasium/envs/classic_control/pendulum.py
+++ b/gymnasium/envs/classic_control/pendulum.py
@@ -167,6 +167,14 @@ class PendulumEnv(gym.Env):
        return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
            from pygame import gfxdraw
--- a/gymnasium/envs/toy_text/blackjack.py
+++ b/gymnasium/envs/toy_text/blackjack.py
@@ -191,6 +191,14 @@ class BlackjackEnv(gym.Env):
        return self._get_obs(), {}
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        try:
            import pygame
        except ImportError:
--- a/gymnasium/envs/toy_text/cliffwalking.py
+++ b/gymnasium/envs/toy_text/cliffwalking.py
@@ -5,6 +5,7 @@ from typing import Optional
 import numpy as np
 import gymnasium as gym
 from gymnasium import Env, spaces
 from gymnasium.envs.toy_text.utils import categorical_sample
 from gymnasium.error import DependencyNotInstalled
@@ -163,6 +164,14 @@ class CliffWalkingEnv(Env):
        return int(self.s), {"prob": 1}
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        if self.render_mode == "ansi":
            return self._render_text()
        else:
--- a/gymnasium/envs/toy_text/frozen_lake.py
+++ b/gymnasium/envs/toy_text/frozen_lake.py
@@ -5,6 +5,7 @@ from typing import List, Optional
 import numpy as np
 import gymnasium as gym
 from gymnasium import Env, spaces, utils
 from gymnasium.envs.toy_text.utils import categorical_sample
 from gymnasium.error import DependencyNotInstalled
@@ -268,6 +269,14 @@ class FrozenLakeEnv(Env):
        return int(self.s), {"prob": 1}
    def render(self):
        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        if self.render_mode == "ansi":
            return self._render_text()
        else:  # self.render_mode in {"human", "rgb_array"}:
--- a/gymnasium/envs/toy_text/taxi.py
+++ b/gymnasium/envs/toy_text/taxi.py
@@ -5,6 +5,7 @@ from typing import Optional
 import numpy as np
 import gymnasium as gym
 from gymnasium import Env, spaces, utils
 from gymnasium.envs.toy_text.utils import categorical_sample
 from gymnasium.error import DependencyNotInstalled
@@ -279,7 +280,14 @@ class TaxiEnv(Env):
        return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)}
    def render(self):
-        if self.render_mode == "ansi":
+        if self.render_mode is None:
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
            )
            return
        elif self.render_mode == "ansi":
            return self._render_text()
        else:  # self.render_mode in {"human", "rgb_array"}:
            return self._render_gui(self.render_mode)
--- a/gymnasium/spaces/graph.py
+++ b/gymnasium/spaces/graph.py
@@ -14,8 +14,8 @@ class GraphInstance(NamedTuple):
    """A Graph space instance.
    * nodes (np.ndarray): an (n x ...) sized array representing the features for n nodes, (...) must adhere to the shape of the node space.
-    * edges (Optional[np.ndarray]): an (m x ...) sized array representing the features for m nodes, (...) must adhere to the shape of the edge space.
+    * edges (Optional[np.ndarray]): an (m x ...) sized array representing the features for m edges, (...) must adhere to the shape of the edge space.
-    * edge_links (Optional[np.ndarray]): an (m x 2) sized array of ints representing the two nodes that each edge connects.
+    * edge_links (Optional[np.ndarray]): an (m x 2) sized array of ints representing the indices of the two nodes that each edge connects.
    """
    nodes: np.ndarray
--- a/gymnasium/vector/async_vector_env.py
+++ b/gymnasium/vector/async_vector_env.py
@@ -566,9 +566,10 @@ def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
                    info,
                ) = env.step(data)
                if terminated or truncated:
-                    old_observation = observation
+                    old_observation, old_info = observation, info
                    observation, info = env.reset()
                    info["final_observation"] = old_observation
                    info["final_info"] = old_info
                pipe.send(((observation, reward, terminated, truncated, info), True))
            elif command == "seed":
                env.seed(data)
@@ -636,10 +637,10 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error
                    info,
                ) = env.step(data)
                if terminated or truncated:
-                    old_observation = observation
+                    old_observation, old_info = observation, info
                    observation, info = env.reset()
                    info["final_observation"] = old_observation
-
+                    info["final_info"] = old_info
                write_to_shared_memory(
                    observation_space, index, observation, shared_memory
                )
--- a/gymnasium/vector/sync_vector_env.py
+++ b/gymnasium/vector/sync_vector_env.py
@@ -150,9 +150,10 @@ class SyncVectorEnv(VectorEnv):
            ) = env.step(action)
            if self._terminateds[i] or self._truncateds[i]:
-                old_observation = observation
+                old_observation, old_info = observation, info
                observation, info = env.reset()
                info["final_observation"] = old_observation
                info["final_info"] = old_info
            observations.append(observation)
            infos = self._add_info(infos, info, i)
        self.observations = concatenate(
--- a/gymnasium/version.py
+++ b/gymnasium/version.py
@@ -1 +1 @@
-VERSION = "0.26.1"
+VERSION = "0.26.2"
--- a/gymnasium/wrappers/atari_preprocessing.py
+++ b/gymnasium/wrappers/atari_preprocessing.py
@@ -98,7 +98,6 @@ class AtariPreprocessing(gym.Wrapper):
                np.empty(env.observation_space.shape, dtype=np.uint8),
            ]
        self.ale = env.unwrapped.ale
        self.lives = 0
        self.game_over = False
@@ -112,6 +111,11 @@ class AtariPreprocessing(gym.Wrapper):
            low=_low, high=_high, shape=_shape, dtype=_obs_dtype
        )
    @property
    def ale(self):
        """Make ale as a class property to avoid serialization error."""
        return self.env.unwrapped.ale
    def step(self, action):
        """Applies the preprocessing for an :meth:`env.step`."""
        total_reward, terminated, truncated, info = 0.0, False, False, {}
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@ extras = {
    "box2d": ["box2d-py==2.3.5", "pygame==2.1.0", "swig==4.*"],
    "classic_control": ["pygame==2.1.0"],
    "mujoco_py": ["mujoco_py<2.2,>=2.1"],
-    "mujoco": ["mujoco==2.2.0", "imageio>=2.14.1"],
+    "mujoco": ["mujoco==2.2", "imageio>=2.14.1"],
    "toy_text": ["pygame==2.1.0"],
    "other": ["lz4>=3.1.0", "opencv-python>=3.0", "matplotlib>=3.0", "moviepy>=1.0.0"],
 }
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,10 +1,13 @@
 from functools import partial
 import numpy as np
 import pytest
-from gymnasium.spaces import Tuple
+from gymnasium.spaces import Discrete, Tuple
 from gymnasium.vector.async_vector_env import AsyncVectorEnv
 from gymnasium.vector.sync_vector_env import SyncVectorEnv
 from gymnasium.vector.vector_env import VectorEnv
 from tests.testing_env import GenericTestEnv
 from tests.vector.utils import CustomSpace, make_env
@@ -58,3 +61,65 @@ def test_custom_space_vector_env():
    assert isinstance(env.single_action_space, CustomSpace)
    assert isinstance(env.action_space, Tuple)
@pytest.mark.parametrize(
    "vectoriser",
    (
        SyncVectorEnv,
        partial(AsyncVectorEnv, shared_memory=True),
        partial(AsyncVectorEnv, shared_memory=False),
    ),
    ids=["Sync", "Async with shared memory", "Async without shared memory"],
 )
 def test_final_obs_info(vectoriser):
    """Tests that the vector environments correctly return the final observation and info."""
    def reset_fn(self, seed=None, options=None):
        return 0, {"reset": True}
    def thunk():
        return GenericTestEnv(
            action_space=Discrete(4),
            observation_space=Discrete(4),
            reset_fn=reset_fn,
            step_fn=lambda self, action: (
                action if action < 3 else 0,
                0,
                action >= 3,
                False,
                {"action": action},
            ),
        )
    env = vectoriser([thunk])
    obs, info = env.reset()
    assert obs == np.array([0]) and info == {
        "reset": np.array([True]),
        "_reset": np.array([True]),
    }
    obs, _, termination, _, info = env.step([1])
    assert (
        obs == np.array([1])
        and termination == np.array([False])
        and info == {"action": np.array([1]), "_action": np.array([True])}
    )
    obs, _, termination, _, info = env.step([2])
    assert (
        obs == np.array([2])
        and termination == np.array([False])
        and info == {"action": np.array([2]), "_action": np.array([True])}
    )
    obs, _, termination, _, info = env.step([3])
    assert (
        obs == np.array([0])
        and termination == np.array([True])
        and info["reset"] == np.array([True])
    )
    assert "final_observation" in info and "final_info" in info
    assert info["final_observation"] == np.array([0]) and info["final_info"] == {
        "action": 3
    }