mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-28 09:17:18 +00:00
Add compatibility with numpy 2.0 (#1094)
Co-authored-by: Mark Towers <mark.m.towers@gmail.com>
This commit is contained in:
committed by
GitHub
parent
3b7f398508
commit
8161d7d0a0
12
.github/workflows/build.yml
vendored
12
.github/workflows/build.yml
vendored
@@ -10,15 +10,21 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
|
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
|
||||||
|
numpy-version: ['>=1.21,<2.0', '>=2.0']
|
||||||
|
exclude:
|
||||||
|
- python-version: '3.8' # numpy>=2.0 requires Python>=3.9
|
||||||
|
numpy-version: '>=2.0'
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- run: |
|
- run: |
|
||||||
docker build -f bin/all-py.Dockerfile \
|
docker build -f bin/all-py.Dockerfile \
|
||||||
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
|
--build-arg PYTHON_VERSION="${{ matrix.python-version }}" \
|
||||||
--tag gymnasium-all-docker .
|
--build-arg NUMPY_VERSION="${{ matrix.numpy-version }}" \
|
||||||
|
--tag gymnasium-all-docker .
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: docker run gymnasium-all-docker pytest tests/*
|
run: docker run gymnasium-all-docker pytest tests/*
|
||||||
- name: Run doctests
|
- name: Run doctests
|
||||||
|
if: ${{ matrix.numpy-version == '>=2.0' }}
|
||||||
run: docker run gymnasium-all-docker pytest --doctest-modules gymnasium/
|
run: docker run gymnasium-all-docker pytest --doctest-modules gymnasium/
|
||||||
|
|
||||||
build-necessary:
|
build-necessary:
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
# A Dockerfile that sets up a full Gymnasium install with test dependencies
|
# A Dockerfile that sets up a full Gymnasium install with test dependencies
|
||||||
ARG PYTHON_VERSION
|
ARG PYTHON_VERSION
|
||||||
|
ARG NUMPY_VERSION=">=1.21,<2.0"
|
||||||
FROM python:$PYTHON_VERSION
|
FROM python:$PYTHON_VERSION
|
||||||
|
|
||||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||||
@@ -27,6 +28,9 @@ RUN git clone https://github.com/openai/mujoco-py.git\
|
|||||||
COPY . /usr/local/gymnasium/
|
COPY . /usr/local/gymnasium/
|
||||||
WORKDIR /usr/local/gymnasium/
|
WORKDIR /usr/local/gymnasium/
|
||||||
|
|
||||||
|
# Specify the numpy version to cover both 1.x and 2.x
|
||||||
|
RUN pip install --upgrade "numpy$NUMPY_VERSION"
|
||||||
|
|
||||||
# Test with PyTorch CPU build, since CUDA is not available in CI anyway
|
# Test with PyTorch CPU build, since CUDA is not available in CI anyway
|
||||||
RUN pip install .[all,testing] --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu
|
RUN pip install .[all,testing] --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
|
||||||
|
@@ -540,6 +540,7 @@ class CarRacing(gym.Env, EzPickle):
|
|||||||
def step(self, action: Union[np.ndarray, int]):
|
def step(self, action: Union[np.ndarray, int]):
|
||||||
assert self.car is not None
|
assert self.car is not None
|
||||||
if action is not None:
|
if action is not None:
|
||||||
|
action = action.astype(np.float64)
|
||||||
if self.continuous:
|
if self.continuous:
|
||||||
self.car.steer(-action[0])
|
self.car.steer(-action[0])
|
||||||
self.car.gas(action[1])
|
self.car.gas(action[1])
|
||||||
|
@@ -509,7 +509,7 @@ class LunarLander(gym.Env, EzPickle):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.continuous:
|
if self.continuous:
|
||||||
action = np.clip(action, -1, +1).astype(np.float32)
|
action = np.clip(action, -1, +1).astype(np.float64)
|
||||||
else:
|
else:
|
||||||
assert self.action_space.contains(
|
assert self.action_space.contains(
|
||||||
action
|
action
|
||||||
|
@@ -447,9 +447,9 @@ def rk4(derivs, y0, t):
|
|||||||
try:
|
try:
|
||||||
Ny = len(y0)
|
Ny = len(y0)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
yout = np.zeros((len(t),), np.float_)
|
yout = np.zeros((len(t),), np.float64)
|
||||||
else:
|
else:
|
||||||
yout = np.zeros((len(t), Ny), np.float_)
|
yout = np.zeros((len(t), Ny), np.float64)
|
||||||
|
|
||||||
yout[0] = y0
|
yout[0] = y0
|
||||||
|
|
||||||
|
@@ -241,7 +241,10 @@ class PendulumEnv(gym.Env):
|
|||||||
if self.last_u is not None:
|
if self.last_u is not None:
|
||||||
scale_img = pygame.transform.smoothscale(
|
scale_img = pygame.transform.smoothscale(
|
||||||
img,
|
img,
|
||||||
(scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2),
|
(
|
||||||
|
float(scale * np.abs(self.last_u) / 2),
|
||||||
|
float(scale * np.abs(self.last_u) / 2),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
is_flip = bool(self.last_u > 0)
|
is_flip = bool(self.last_u > 0)
|
||||||
scale_img = pygame.transform.flip(scale_img, is_flip, True)
|
scale_img = pygame.transform.flip(scale_img, is_flip, True)
|
||||||
|
@@ -20,7 +20,7 @@ class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]):
|
|||||||
>>> from gymnasium.spaces import Dict, Box, Discrete
|
>>> from gymnasium.spaces import Dict, Box, Discrete
|
||||||
>>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42)
|
>>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42)
|
||||||
>>> observation_space.sample()
|
>>> observation_space.sample()
|
||||||
{'color': 0, 'position': array([-0.3991573 , 0.21649833], dtype=float32)}
|
{'color': np.int64(0), 'position': array([-0.3991573 , 0.21649833], dtype=float32)}
|
||||||
|
|
||||||
With a nested dict:
|
With a nested dict:
|
||||||
|
|
||||||
|
@@ -18,10 +18,10 @@ class Discrete(Space[np.int64]):
|
|||||||
>>> from gymnasium.spaces import Discrete
|
>>> from gymnasium.spaces import Discrete
|
||||||
>>> observation_space = Discrete(2, seed=42) # {0, 1}
|
>>> observation_space = Discrete(2, seed=42) # {0, 1}
|
||||||
>>> observation_space.sample()
|
>>> observation_space.sample()
|
||||||
0
|
np.int64(0)
|
||||||
>>> observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1}
|
>>> observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1}
|
||||||
>>> observation_space.sample()
|
>>> observation_space.sample()
|
||||||
-1
|
np.int64(-1)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@@ -19,9 +19,9 @@ class OneOf(Space[Any]):
|
|||||||
>>> from gymnasium.spaces import OneOf, Box, Discrete
|
>>> from gymnasium.spaces import OneOf, Box, Discrete
|
||||||
>>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123)
|
>>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123)
|
||||||
>>> observation_space.sample() # the first element is the space index (Box in this case) and the second element is the sample from Box
|
>>> observation_space.sample() # the first element is the space index (Box in this case) and the second element is the sample from Box
|
||||||
(0, 0)
|
(np.int64(0), np.int64(0))
|
||||||
>>> observation_space.sample() # this time the Discrete space was sampled as index=0
|
>>> observation_space.sample() # this time the Discrete space was sampled as index=0
|
||||||
(1, array([-0.00711833, -0.7257502 ], dtype=float32))
|
(np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32))
|
||||||
>>> observation_space[0]
|
>>> observation_space[0]
|
||||||
Discrete(2)
|
Discrete(2)
|
||||||
>>> observation_space[1]
|
>>> observation_space[1]
|
||||||
|
@@ -19,7 +19,7 @@ class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]):
|
|||||||
>>> from gymnasium.spaces import Tuple, Box, Discrete
|
>>> from gymnasium.spaces import Tuple, Box, Discrete
|
||||||
>>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42)
|
>>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42)
|
||||||
>>> observation_space.sample()
|
>>> observation_space.sample()
|
||||||
(0, array([-0.3991573 , 0.21649833], dtype=float32))
|
(np.int64(0), array([-0.3991573 , 0.21649833], dtype=float32))
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@@ -32,15 +32,9 @@ def _check_box_observation_space(observation_space: spaces.Box):
|
|||||||
), f"The Box observation space shape and high shape have have different shapes, high shape: {observation_space.high.shape}, box shape: {observation_space.shape}"
|
), f"The Box observation space shape and high shape have have different shapes, high shape: {observation_space.high.shape}, box shape: {observation_space.shape}"
|
||||||
|
|
||||||
if np.any(observation_space.low == observation_space.high):
|
if np.any(observation_space.low == observation_space.high):
|
||||||
logger.warn(
|
logger.warn("A Box observation space maximum and minimum values are equal.")
|
||||||
"A Box observation space maximum and minimum values are equal. "
|
|
||||||
f"Actual equal coordinates: {[x for x in zip(*np.where(observation_space.low == observation_space.high))]}"
|
|
||||||
)
|
|
||||||
elif np.any(observation_space.high < observation_space.low):
|
elif np.any(observation_space.high < observation_space.low):
|
||||||
logger.warn(
|
logger.warn("A Box observation space low value is greater than a high value.")
|
||||||
"A Box observation space low value is greater than a high value. "
|
|
||||||
f"Actual less than coordinates: {[x for x in zip(*np.where(observation_space.high < observation_space.low))]}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _check_box_action_space(action_space: spaces.Box):
|
def _check_box_action_space(action_space: spaces.Box):
|
||||||
@@ -57,10 +51,7 @@ def _check_box_action_space(action_space: spaces.Box):
|
|||||||
), f"The Box action space shape and high shape have different shapes, high shape: {action_space.high.shape}, box shape: {action_space.shape}"
|
), f"The Box action space shape and high shape have different shapes, high shape: {action_space.high.shape}, box shape: {action_space.shape}"
|
||||||
|
|
||||||
if np.any(action_space.low == action_space.high):
|
if np.any(action_space.low == action_space.high):
|
||||||
logger.warn(
|
logger.warn("A Box action space maximum and minimum values are equal.")
|
||||||
"A Box action space maximum and minimum values are equal. "
|
|
||||||
f"Actual equal coordinates: {[x for x in zip(*np.where(action_space.low == action_space.high))]}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def check_space(
|
def check_space(
|
||||||
|
@@ -37,8 +37,14 @@ class AtariPreprocessing(gym.Wrapper, gym.utils.RecordConstructorArgs):
|
|||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> import gymnasium as gym # doctest: +SKIP
|
>>> import gymnasium as gym # doctest: +SKIP
|
||||||
>>> env = gym.make("ALE/Adventure-v5") # doctest: +SKIP
|
>>> import ale_py # doctest: +SKIP
|
||||||
>>> env = AtariPreprocessing(env, noop_max=10, frame_skip=0, screen_size=84, terminal_on_life_loss=True, grayscale_obs=False, grayscale_newaxis=False) # doctest: +SKIP
|
>>> gym.register_envs(ale_py) # doctest: +SKIP
|
||||||
|
>>> env = gym.make("ALE/Pong-v5", frameskip=1) # doctest: +SKIP
|
||||||
|
>>> env = AtariPreprocessing( # doctest: +SKIP
|
||||||
|
... env,
|
||||||
|
... noop_max=10, frame_skip=4, terminal_on_life_loss=True,
|
||||||
|
... screen_size=84, grayscale_obs=False, grayscale_newaxis=False
|
||||||
|
... )
|
||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* Added in gym v0.12.2 (gym #1455)
|
* Added in gym v0.12.2 (gym #1455)
|
||||||
|
@@ -559,9 +559,9 @@ class MaxAndSkipObservation(
|
|||||||
>>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
|
>>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
|
||||||
>>> wrapped_obs1, *_ = wrapped_env.step(1)
|
>>> wrapped_obs1, *_ = wrapped_env.step(1)
|
||||||
>>> np.all(obs0 == wrapped_obs0)
|
>>> np.all(obs0 == wrapped_obs0)
|
||||||
True
|
np.True_
|
||||||
>>> np.all(wrapped_obs1 == skip_and_max_obs)
|
>>> np.all(wrapped_obs1 == skip_and_max_obs)
|
||||||
True
|
np.True_
|
||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* v1.0.0 - Initially add
|
* v1.0.0 - Initially add
|
||||||
|
@@ -52,7 +52,7 @@ class NormalizeReward(
|
|||||||
...
|
...
|
||||||
>>> env.close()
|
>>> env.close()
|
||||||
>>> np.var(episode_rewards)
|
>>> np.var(episode_rewards)
|
||||||
0.0008876301247721108
|
np.float64(0.0008876301247721108)
|
||||||
|
|
||||||
Example with the normalize reward wrapper:
|
Example with the normalize reward wrapper:
|
||||||
>>> import numpy as np
|
>>> import numpy as np
|
||||||
@@ -70,7 +70,7 @@ class NormalizeReward(
|
|||||||
>>> env.close()
|
>>> env.close()
|
||||||
>>> # will approach 0.99 with more episodes
|
>>> # will approach 0.99 with more episodes
|
||||||
>>> np.var(episode_rewards)
|
>>> np.var(episode_rewards)
|
||||||
0.010162116476634746
|
np.float64(0.010162116476634746)
|
||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* v0.21.0 - Initially added
|
* v0.21.0 - Initially added
|
||||||
|
@@ -144,7 +144,7 @@ class RescaleAction(
|
|||||||
>>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action)
|
>>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action)
|
||||||
>>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
|
>>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
|
||||||
>>> np.all(obs == wrapped_env_obs)
|
>>> np.all(obs == wrapped_env_obs)
|
||||||
True
|
np.True_
|
||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* v0.15.4 - Initially added
|
* v0.15.4 - Initially added
|
||||||
|
@@ -632,11 +632,11 @@ class AddRenderObservation(
|
|||||||
>>> obs, _ = env.reset(seed=123)
|
>>> obs, _ = env.reset(seed=123)
|
||||||
>>> image = env.render()
|
>>> image = env.render()
|
||||||
>>> np.all(obs == image)
|
>>> np.all(obs == image)
|
||||||
True
|
np.True_
|
||||||
>>> obs, *_ = env.step(env.action_space.sample())
|
>>> obs, *_ = env.step(env.action_space.sample())
|
||||||
>>> image = env.render()
|
>>> image = env.render()
|
||||||
>>> np.all(obs == image)
|
>>> np.all(obs == image)
|
||||||
True
|
np.True_
|
||||||
|
|
||||||
Example - Add the rendered image to the original observation as a dictionary item:
|
Example - Add the rendered image to the original observation as a dictionary item:
|
||||||
>>> env = gym.make("CartPole-v1", render_mode="rgb_array")
|
>>> env = gym.make("CartPole-v1", render_mode="rgb_array")
|
||||||
@@ -649,11 +649,11 @@ class AddRenderObservation(
|
|||||||
>>> obs["state"]
|
>>> obs["state"]
|
||||||
array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32)
|
array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32)
|
||||||
>>> np.all(obs["pixels"] == env.render())
|
>>> np.all(obs["pixels"] == env.render())
|
||||||
True
|
np.True_
|
||||||
>>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample())
|
>>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample())
|
||||||
>>> image = env.render()
|
>>> image = env.render()
|
||||||
>>> np.all(obs["pixels"] == image)
|
>>> np.all(obs["pixels"] == image)
|
||||||
True
|
np.True_
|
||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* v0.15.0 - Initially added as ``PixelObservationWrapper``
|
* v0.15.0 - Initially added as ``PixelObservationWrapper``
|
||||||
|
@@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA
|
|||||||
>>> _ = env.reset()
|
>>> _ = env.reset()
|
||||||
>>> _, rew, _, _, _ = env.step(1)
|
>>> _, rew, _, _, _ = env.step(1)
|
||||||
>>> rew
|
>>> rew
|
||||||
0.5
|
np.float64(0.5)
|
||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* v1.0.0 - Initially added
|
* v1.0.0 - Initially added
|
||||||
|
@@ -50,18 +50,18 @@ class DictInfoToList(VectorWrapper):
|
|||||||
Another example for vector environments:
|
Another example for vector environments:
|
||||||
>>> import numpy as np
|
>>> import numpy as np
|
||||||
>>> import gymnasium as gym
|
>>> import gymnasium as gym
|
||||||
>>> envs = gym.make_vec("HalfCheetah-v4", num_envs=3)
|
>>> envs = gym.make_vec("HalfCheetah-v4", num_envs=2)
|
||||||
>>> _ = envs.reset(seed=123)
|
>>> _ = envs.reset(seed=123)
|
||||||
>>> _ = envs.action_space.seed(123)
|
>>> _ = envs.action_space.seed(123)
|
||||||
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
|
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
|
||||||
>>> infos
|
>>> infos
|
||||||
{'x_position': array([0.03332211, 0.10172355, 0.08920531]), '_x_position': array([ True, True, True]), 'x_velocity': array([-0.06296527, 0.89345848, 0.37710836]), '_x_velocity': array([ True, True, True]), 'reward_run': array([-0.06296527, 0.89345848, 0.37710836]), '_reward_run': array([ True, True, True]), 'reward_ctrl': array([-0.24503503, -0.21944423, -0.20672209]), '_reward_ctrl': array([ True, True, True])}
|
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])}
|
||||||
>>> envs = DictInfoToList(envs)
|
>>> envs = DictInfoToList(envs)
|
||||||
>>> _ = envs.reset(seed=123)
|
>>> _ = envs.reset(seed=123)
|
||||||
>>> _ = envs.action_space.seed(123)
|
>>> _ = envs.action_space.seed(123)
|
||||||
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
|
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
|
||||||
>>> infos
|
>>> infos
|
||||||
[{'x_position': 0.03332210900362942, 'x_velocity': -0.06296527291998533, 'reward_run': -0.06296527291998533, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}, {'x_position': 0.08920531470057845, 'x_velocity': 0.3771083596080768, 'reward_run': 0.3771083596080768, 'reward_ctrl': -0.20672209262847902}]
|
[{'x_position': np.float64(0.03332210900362942), 'x_velocity': np.float64(-0.06296527291998533), 'reward_run': np.float64(-0.06296527291998533), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}]
|
||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* v0.24.0 - Initially added as ``VectorListInfo``
|
* v0.24.0 - Initially added as ``VectorListInfo``
|
||||||
|
@@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
|
|||||||
>>> for _ in range(100):
|
>>> for _ in range(100):
|
||||||
... obs, *_ = envs.step(envs.action_space.sample())
|
... obs, *_ = envs.step(envs.action_space.sample())
|
||||||
>>> np.mean(obs)
|
>>> np.mean(obs)
|
||||||
0.024251968
|
np.float32(0.024251968)
|
||||||
>>> np.std(obs)
|
>>> np.std(obs)
|
||||||
0.62259156
|
np.float32(0.62259156)
|
||||||
>>> envs.close()
|
>>> envs.close()
|
||||||
|
|
||||||
Example with the normalize reward wrapper:
|
Example with the normalize reward wrapper:
|
||||||
@@ -49,9 +49,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
|
|||||||
>>> for _ in range(100):
|
>>> for _ in range(100):
|
||||||
... obs, *_ = envs.step(envs.action_space.sample())
|
... obs, *_ = envs.step(envs.action_space.sample())
|
||||||
>>> np.mean(obs)
|
>>> np.mean(obs)
|
||||||
-0.2359734
|
np.float32(-0.2359734)
|
||||||
>>> np.std(obs)
|
>>> np.std(obs)
|
||||||
1.1938739
|
np.float32(1.1938739)
|
||||||
>>> envs.close()
|
>>> envs.close()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@@ -44,9 +44,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
|
|||||||
...
|
...
|
||||||
>>> envs.close()
|
>>> envs.close()
|
||||||
>>> np.mean(episode_rewards)
|
>>> np.mean(episode_rewards)
|
||||||
-0.03359492141887935
|
np.float64(-0.03359492141887935)
|
||||||
>>> np.std(episode_rewards)
|
>>> np.std(episode_rewards)
|
||||||
0.029028230434438706
|
np.float64(0.029028230434438706)
|
||||||
|
|
||||||
Example with the normalize reward wrapper:
|
Example with the normalize reward wrapper:
|
||||||
>>> import gymnasium as gym
|
>>> import gymnasium as gym
|
||||||
@@ -62,9 +62,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
|
|||||||
...
|
...
|
||||||
>>> envs.close()
|
>>> envs.close()
|
||||||
>>> np.mean(episode_rewards)
|
>>> np.mean(episode_rewards)
|
||||||
-0.1598639586606745
|
np.float64(-0.1598639586606745)
|
||||||
>>> np.std(episode_rewards)
|
>>> np.std(episode_rewards)
|
||||||
0.27800309628058434
|
np.float64(0.27800309628058434)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper):
|
|||||||
>>> obs
|
>>> obs
|
||||||
array([[-0.46553135, -0.00142543],
|
array([[-0.46553135, -0.00142543],
|
||||||
[-0.498371 , -0.00715587],
|
[-0.498371 , -0.00715587],
|
||||||
[-0.4651575 , -0.00624371]], dtype=float32)
|
[-0.46515748, -0.00624371]], dtype=float32)
|
||||||
|
|
||||||
Example - With action transformation:
|
Example - With action transformation:
|
||||||
>>> import gymnasium as gym
|
>>> import gymnasium as gym
|
||||||
|
@@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation):
|
|||||||
>>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
|
>>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
|
||||||
>>> obs, info = envs.reset(seed=123)
|
>>> obs, info = envs.reset(seed=123)
|
||||||
>>> obs.min()
|
>>> obs.min()
|
||||||
-0.0446179
|
np.float32(-0.0446179)
|
||||||
>>> obs.max()
|
>>> obs.max()
|
||||||
0.0469136
|
np.float32(0.0469136)
|
||||||
>>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0)
|
>>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0)
|
||||||
>>> obs, info = envs.reset(seed=123)
|
>>> obs, info = envs.reset(seed=123)
|
||||||
>>> obs.min()
|
>>> obs.min()
|
||||||
-0.33379582
|
np.float32(-0.33379582)
|
||||||
>>> obs.max()
|
>>> obs.max()
|
||||||
0.55998987
|
np.float32(0.55998987)
|
||||||
>>> envs.close()
|
>>> envs.close()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@@ -108,7 +108,6 @@ gymnasium = [
|
|||||||
# Linters and Test tools #######################################################
|
# Linters and Test tools #######################################################
|
||||||
|
|
||||||
[tool.black]
|
[tool.black]
|
||||||
safe = true
|
|
||||||
|
|
||||||
[tool.isort]
|
[tool.isort]
|
||||||
atomic = true
|
atomic = true
|
||||||
|
@@ -127,7 +127,7 @@ def test_box_actions_out_of_bound(env: gym.Env):
|
|||||||
if is_upper_bound:
|
if is_upper_bound:
|
||||||
obs, _, _, _, _ = env.step(upper_bounds)
|
obs, _, _, _, _ = env.step(upper_bounds)
|
||||||
oob_action = upper_bounds.copy()
|
oob_action = upper_bounds.copy()
|
||||||
oob_action[i] += np.cast[dtype](OOB_VALUE)
|
oob_action[i] += np.asarray(OOB_VALUE, dtype=dtype)
|
||||||
|
|
||||||
assert oob_action[i] > upper_bounds[i]
|
assert oob_action[i] > upper_bounds[i]
|
||||||
oob_obs, _, _, _, _ = oob_env.step(oob_action)
|
oob_obs, _, _, _, _ = oob_env.step(oob_action)
|
||||||
@@ -135,11 +135,9 @@ def test_box_actions_out_of_bound(env: gym.Env):
|
|||||||
assert np.all(obs == oob_obs)
|
assert np.all(obs == oob_obs)
|
||||||
|
|
||||||
if is_lower_bound:
|
if is_lower_bound:
|
||||||
obs, _, _, _, _ = env.step(
|
obs, _, _, _, _ = env.step(lower_bounds)
|
||||||
lower_bounds
|
|
||||||
) # `env` is unwrapped, and in new step API
|
|
||||||
oob_action = lower_bounds.copy()
|
oob_action = lower_bounds.copy()
|
||||||
oob_action[i] -= np.cast[dtype](OOB_VALUE)
|
oob_action[i] -= np.asarray(OOB_VALUE, dtype=dtype)
|
||||||
|
|
||||||
assert oob_action[i] < lower_bounds[i]
|
assert oob_action[i] < lower_bounds[i]
|
||||||
oob_obs, _, _, _, _ = oob_env.step(oob_action)
|
oob_obs, _, _, _, _ = oob_env.step(oob_action)
|
||||||
|
@@ -239,7 +239,7 @@ def chi2_test(sample, low, high, bounded_below, bounded_above):
|
|||||||
|
|
||||||
if bounded_below and bounded_above:
|
if bounded_below and bounded_above:
|
||||||
# X ~ U(low, high)
|
# X ~ U(low, high)
|
||||||
degrees_of_freedom = high - low + 1
|
degrees_of_freedom = int(high) - int(low) + 1
|
||||||
observed_frequency = np.bincount(sample - low, minlength=degrees_of_freedom)
|
observed_frequency = np.bincount(sample - low, minlength=degrees_of_freedom)
|
||||||
assert observed_frequency.shape == (degrees_of_freedom,)
|
assert observed_frequency.shape == (degrees_of_freedom,)
|
||||||
expected_frequency = np.ones(degrees_of_freedom) * n_trials / degrees_of_freedom
|
expected_frequency = np.ones(degrees_of_freedom) * n_trials / degrees_of_freedom
|
||||||
|
@@ -35,7 +35,7 @@ def _modify_space(space: spaces.Space, attribute: str, value):
|
|||||||
[
|
[
|
||||||
UserWarning,
|
UserWarning,
|
||||||
spaces.Box(np.zeros(5), np.zeros(5)),
|
spaces.Box(np.zeros(5), np.zeros(5)),
|
||||||
"A Box observation space maximum and minimum values are equal. Actual equal coordinates: [(0,), (1,), (2,), (3,), (4,)]",
|
"A Box observation space maximum and minimum values are equal.",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
AssertionError,
|
AssertionError,
|
||||||
@@ -106,7 +106,7 @@ def test_check_observation_space(test, space, message: str):
|
|||||||
[
|
[
|
||||||
UserWarning,
|
UserWarning,
|
||||||
spaces.Box(np.zeros(5), np.zeros(5)),
|
spaces.Box(np.zeros(5), np.zeros(5)),
|
||||||
"A Box action space maximum and minimum values are equal. Actual equal coordinates: [(0,), (1,), (2,), (3,), (4,)]",
|
"A Box action space maximum and minimum values are equal.",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
AssertionError,
|
AssertionError,
|
||||||
|
Reference in New Issue
Block a user