Updated gymnasium to be equivalent to gym v26.2 (#36)

This commit is contained in:
Mark Towers
2022-10-05 17:53:45 +01:00
committed by GitHub
parent dc60cdc4af
commit f6489c38b3
21 changed files with 190 additions and 13 deletions

View File

@@ -1,6 +1,9 @@
name: build name: build
on: [pull_request, push] on: [pull_request, push]
permissions:
contents: read # to fetch code (actions/checkout)
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@@ -5,6 +5,10 @@ on:
pull_request: pull_request:
push: push:
branches: [master] branches: [master]
permissions:
contents: read # to fetch code (actions/checkout)
jobs: jobs:
pre-commit: pre-commit:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@@ -608,6 +608,14 @@ class BipedalWalker(gym.Env, EzPickle):
return np.array(state, dtype=np.float32), reward, terminated, False, {} return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
from pygame import gfxdraw from pygame import gfxdraw

View File

@@ -569,7 +569,15 @@ class CarRacing(gym.Env, EzPickle):
return self.state, step_reward, terminated, truncated, {} return self.state, step_reward, terminated, truncated, {}
def render(self): def render(self):
return self._render(self.render_mode) if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
else:
return self._render(self.render_mode)
def _render(self, mode: str): def _render(self, mode: str):
assert mode in self.metadata["render_modes"] assert mode in self.metadata["render_modes"]

View File

@@ -602,6 +602,14 @@ class LunarLander(gym.Env, EzPickle):
return np.array(state, dtype=np.float32), reward, terminated, False, {} return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
from pygame import gfxdraw from pygame import gfxdraw

View File

@@ -4,7 +4,9 @@ from typing import Optional
import numpy as np import numpy as np
from numpy import cos, pi, sin from numpy import cos, pi, sin
import gymnasium as gym
from gymnasium import Env, spaces from gymnasium import Env, spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled from gymnasium.error import DependencyNotInstalled
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy" __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
@@ -20,7 +22,6 @@ __author__ = "Christoph Dann <cdann@cdann.de>"
# SOURCE: # SOURCE:
# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py # https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
from gymnasium.envs.classic_control import utils
class AcrobotEnv(Env): class AcrobotEnv(Env):
@@ -280,6 +281,14 @@ class AcrobotEnv(Env):
return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0 return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
from pygame import gfxdraw from pygame import gfxdraw

View File

@@ -209,6 +209,14 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
return np.array(self.state, dtype=np.float32), {} return np.array(self.state, dtype=np.float32), {}
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
from pygame import gfxdraw from pygame import gfxdraw

View File

@@ -193,6 +193,14 @@ class Continuous_MountainCarEnv(gym.Env):
return np.sin(3 * xs) * 0.45 + 0.55 return np.sin(3 * xs) * 0.45 + 0.55
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
from pygame import gfxdraw from pygame import gfxdraw

View File

@@ -170,6 +170,14 @@ class MountainCarEnv(gym.Env):
return np.sin(3 * xs) * 0.45 + 0.55 return np.sin(3 * xs) * 0.45 + 0.55
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
from pygame import gfxdraw from pygame import gfxdraw

View File

@@ -167,6 +167,14 @@ class PendulumEnv(gym.Env):
return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32) return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
from pygame import gfxdraw from pygame import gfxdraw

View File

@@ -191,6 +191,14 @@ class BlackjackEnv(gym.Env):
return self._get_obs(), {} return self._get_obs(), {}
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try: try:
import pygame import pygame
except ImportError: except ImportError:

View File

@@ -5,6 +5,7 @@ from typing import Optional
import numpy as np import numpy as np
import gymnasium as gym
from gymnasium import Env, spaces from gymnasium import Env, spaces
from gymnasium.envs.toy_text.utils import categorical_sample from gymnasium.envs.toy_text.utils import categorical_sample
from gymnasium.error import DependencyNotInstalled from gymnasium.error import DependencyNotInstalled
@@ -163,6 +164,14 @@ class CliffWalkingEnv(Env):
return int(self.s), {"prob": 1} return int(self.s), {"prob": 1}
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
if self.render_mode == "ansi": if self.render_mode == "ansi":
return self._render_text() return self._render_text()
else: else:

View File

@@ -5,6 +5,7 @@ from typing import List, Optional
import numpy as np import numpy as np
import gymnasium as gym
from gymnasium import Env, spaces, utils from gymnasium import Env, spaces, utils
from gymnasium.envs.toy_text.utils import categorical_sample from gymnasium.envs.toy_text.utils import categorical_sample
from gymnasium.error import DependencyNotInstalled from gymnasium.error import DependencyNotInstalled
@@ -268,6 +269,14 @@ class FrozenLakeEnv(Env):
return int(self.s), {"prob": 1} return int(self.s), {"prob": 1}
def render(self): def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
if self.render_mode == "ansi": if self.render_mode == "ansi":
return self._render_text() return self._render_text()
else: # self.render_mode in {"human", "rgb_array"}: else: # self.render_mode in {"human", "rgb_array"}:

View File

@@ -5,6 +5,7 @@ from typing import Optional
import numpy as np import numpy as np
import gymnasium as gym
from gymnasium import Env, spaces, utils from gymnasium import Env, spaces, utils
from gymnasium.envs.toy_text.utils import categorical_sample from gymnasium.envs.toy_text.utils import categorical_sample
from gymnasium.error import DependencyNotInstalled from gymnasium.error import DependencyNotInstalled
@@ -279,7 +280,14 @@ class TaxiEnv(Env):
return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)} return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)}
def render(self): def render(self):
if self.render_mode == "ansi": if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
elif self.render_mode == "ansi":
return self._render_text() return self._render_text()
else: # self.render_mode in {"human", "rgb_array"}: else: # self.render_mode in {"human", "rgb_array"}:
return self._render_gui(self.render_mode) return self._render_gui(self.render_mode)

View File

@@ -14,8 +14,8 @@ class GraphInstance(NamedTuple):
"""A Graph space instance. """A Graph space instance.
* nodes (np.ndarray): an (n x ...) sized array representing the features for n nodes, (...) must adhere to the shape of the node space. * nodes (np.ndarray): an (n x ...) sized array representing the features for n nodes, (...) must adhere to the shape of the node space.
* edges (Optional[np.ndarray]): an (m x ...) sized array representing the features for m nodes, (...) must adhere to the shape of the edge space. * edges (Optional[np.ndarray]): an (m x ...) sized array representing the features for m edges, (...) must adhere to the shape of the edge space.
* edge_links (Optional[np.ndarray]): an (m x 2) sized array of ints representing the two nodes that each edge connects. * edge_links (Optional[np.ndarray]): an (m x 2) sized array of ints representing the indices of the two nodes that each edge connects.
""" """
nodes: np.ndarray nodes: np.ndarray

View File

@@ -566,9 +566,10 @@ def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
info, info,
) = env.step(data) ) = env.step(data)
if terminated or truncated: if terminated or truncated:
old_observation = observation old_observation, old_info = observation, info
observation, info = env.reset() observation, info = env.reset()
info["final_observation"] = old_observation info["final_observation"] = old_observation
info["final_info"] = old_info
pipe.send(((observation, reward, terminated, truncated, info), True)) pipe.send(((observation, reward, terminated, truncated, info), True))
elif command == "seed": elif command == "seed":
env.seed(data) env.seed(data)
@@ -636,10 +637,10 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error
info, info,
) = env.step(data) ) = env.step(data)
if terminated or truncated: if terminated or truncated:
old_observation = observation old_observation, old_info = observation, info
observation, info = env.reset() observation, info = env.reset()
info["final_observation"] = old_observation info["final_observation"] = old_observation
info["final_info"] = old_info
write_to_shared_memory( write_to_shared_memory(
observation_space, index, observation, shared_memory observation_space, index, observation, shared_memory
) )

View File

@@ -150,9 +150,10 @@ class SyncVectorEnv(VectorEnv):
) = env.step(action) ) = env.step(action)
if self._terminateds[i] or self._truncateds[i]: if self._terminateds[i] or self._truncateds[i]:
old_observation = observation old_observation, old_info = observation, info
observation, info = env.reset() observation, info = env.reset()
info["final_observation"] = old_observation info["final_observation"] = old_observation
info["final_info"] = old_info
observations.append(observation) observations.append(observation)
infos = self._add_info(infos, info, i) infos = self._add_info(infos, info, i)
self.observations = concatenate( self.observations = concatenate(

View File

@@ -1 +1 @@
VERSION = "0.26.1" VERSION = "0.26.2"

View File

@@ -98,7 +98,6 @@ class AtariPreprocessing(gym.Wrapper):
np.empty(env.observation_space.shape, dtype=np.uint8), np.empty(env.observation_space.shape, dtype=np.uint8),
] ]
self.ale = env.unwrapped.ale
self.lives = 0 self.lives = 0
self.game_over = False self.game_over = False
@@ -112,6 +111,11 @@ class AtariPreprocessing(gym.Wrapper):
low=_low, high=_high, shape=_shape, dtype=_obs_dtype low=_low, high=_high, shape=_shape, dtype=_obs_dtype
) )
@property
def ale(self):
"""Make ale as a class property to avoid serialization error."""
return self.env.unwrapped.ale
def step(self, action): def step(self, action):
"""Applies the preprocessing for an :meth:`env.step`.""" """Applies the preprocessing for an :meth:`env.step`."""
total_reward, terminated, truncated, info = 0.0, False, False, {} total_reward, terminated, truncated, info = 0.0, False, False, {}

View File

@@ -18,7 +18,7 @@ extras = {
"box2d": ["box2d-py==2.3.5", "pygame==2.1.0", "swig==4.*"], "box2d": ["box2d-py==2.3.5", "pygame==2.1.0", "swig==4.*"],
"classic_control": ["pygame==2.1.0"], "classic_control": ["pygame==2.1.0"],
"mujoco_py": ["mujoco_py<2.2,>=2.1"], "mujoco_py": ["mujoco_py<2.2,>=2.1"],
"mujoco": ["mujoco==2.2.0", "imageio>=2.14.1"], "mujoco": ["mujoco==2.2", "imageio>=2.14.1"],
"toy_text": ["pygame==2.1.0"], "toy_text": ["pygame==2.1.0"],
"other": ["lz4>=3.1.0", "opencv-python>=3.0", "matplotlib>=3.0", "moviepy>=1.0.0"], "other": ["lz4>=3.1.0", "opencv-python>=3.0", "matplotlib>=3.0", "moviepy>=1.0.0"],
} }

View File

@@ -1,10 +1,13 @@
from functools import partial
import numpy as np import numpy as np
import pytest import pytest
from gymnasium.spaces import Tuple from gymnasium.spaces import Discrete, Tuple
from gymnasium.vector.async_vector_env import AsyncVectorEnv from gymnasium.vector.async_vector_env import AsyncVectorEnv
from gymnasium.vector.sync_vector_env import SyncVectorEnv from gymnasium.vector.sync_vector_env import SyncVectorEnv
from gymnasium.vector.vector_env import VectorEnv from gymnasium.vector.vector_env import VectorEnv
from tests.testing_env import GenericTestEnv
from tests.vector.utils import CustomSpace, make_env from tests.vector.utils import CustomSpace, make_env
@@ -58,3 +61,65 @@ def test_custom_space_vector_env():
assert isinstance(env.single_action_space, CustomSpace) assert isinstance(env.single_action_space, CustomSpace)
assert isinstance(env.action_space, Tuple) assert isinstance(env.action_space, Tuple)
@pytest.mark.parametrize(
"vectoriser",
(
SyncVectorEnv,
partial(AsyncVectorEnv, shared_memory=True),
partial(AsyncVectorEnv, shared_memory=False),
),
ids=["Sync", "Async with shared memory", "Async without shared memory"],
)
def test_final_obs_info(vectoriser):
"""Tests that the vector environments correctly return the final observation and info."""
def reset_fn(self, seed=None, options=None):
return 0, {"reset": True}
def thunk():
return GenericTestEnv(
action_space=Discrete(4),
observation_space=Discrete(4),
reset_fn=reset_fn,
step_fn=lambda self, action: (
action if action < 3 else 0,
0,
action >= 3,
False,
{"action": action},
),
)
env = vectoriser([thunk])
obs, info = env.reset()
assert obs == np.array([0]) and info == {
"reset": np.array([True]),
"_reset": np.array([True]),
}
obs, _, termination, _, info = env.step([1])
assert (
obs == np.array([1])
and termination == np.array([False])
and info == {"action": np.array([1]), "_action": np.array([True])}
)
obs, _, termination, _, info = env.step([2])
assert (
obs == np.array([2])
and termination == np.array([False])
and info == {"action": np.array([2]), "_action": np.array([True])}
)
obs, _, termination, _, info = env.step([3])
assert (
obs == np.array([0])
and termination == np.array([True])
and info["reset"] == np.array([True])
)
assert "final_observation" in info and "final_info" in info
assert info["final_observation"] == np.array([0]) and info["final_info"] == {
"action": 3
}