From 3b856dec9617e83fa9440397c8ab1c7a8b18b5fc Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Fri, 22 Dec 2023 14:48:22 +0000 Subject: [PATCH] Update vector wrapper docs and add Env generic to doc (#847) --- docs/api/vector.md | 16 +++-- docs/api/vector/wrappers.md | 60 ++++++++++++++++++- docs/api/wrappers/table.md | 1 - docs/api/wrappers/vector_wrappers.md | 19 ------ gymnasium/core.py | 35 ++++++++--- gymnasium/envs/box2d/bipedal_walker.py | 1 + gymnasium/envs/box2d/lunar_lander.py | 1 + gymnasium/envs/classic_control/acrobot.py | 3 +- gymnasium/envs/classic_control/cartpole.py | 1 + .../continuous_mountain_car.py | 1 + .../envs/classic_control/mountain_car.py | 1 + gymnasium/envs/classic_control/pendulum.py | 1 + gymnasium/envs/mujoco/ant.py | 1 + gymnasium/envs/mujoco/ant_v3.py | 1 + gymnasium/envs/mujoco/ant_v4.py | 1 + gymnasium/envs/mujoco/ant_v5.py | 1 + gymnasium/envs/mujoco/half_cheetah.py | 1 + gymnasium/envs/mujoco/half_cheetah_v3.py | 1 + gymnasium/envs/mujoco/half_cheetah_v4.py | 1 + gymnasium/envs/mujoco/half_cheetah_v5.py | 1 + gymnasium/envs/mujoco/hopper.py | 1 + gymnasium/envs/mujoco/hopper_v3.py | 1 + gymnasium/envs/mujoco/hopper_v4.py | 1 + gymnasium/envs/mujoco/hopper_v5.py | 1 + gymnasium/envs/mujoco/humanoid.py | 1 + gymnasium/envs/mujoco/humanoid_v3.py | 1 + gymnasium/envs/mujoco/humanoid_v4.py | 1 + gymnasium/envs/mujoco/humanoid_v5.py | 1 + gymnasium/envs/mujoco/humanoidstandup.py | 1 + gymnasium/envs/mujoco/humanoidstandup_v4.py | 1 + gymnasium/envs/mujoco/humanoidstandup_v5.py | 1 + .../envs/mujoco/inverted_double_pendulum.py | 1 + .../mujoco/inverted_double_pendulum_v4.py | 1 + .../mujoco/inverted_double_pendulum_v5.py | 1 + gymnasium/envs/mujoco/inverted_pendulum.py | 1 + gymnasium/envs/mujoco/inverted_pendulum_v4.py | 1 + gymnasium/envs/mujoco/inverted_pendulum_v5.py | 1 + gymnasium/envs/mujoco/pusher.py | 1 + gymnasium/envs/mujoco/pusher_v4.py | 1 + gymnasium/envs/mujoco/pusher_v5.py | 3 +- gymnasium/envs/mujoco/reacher.py | 1 + gymnasium/envs/mujoco/reacher_v4.py | 1 + gymnasium/envs/mujoco/reacher_v5.py | 1 + gymnasium/envs/mujoco/swimmer.py | 1 + gymnasium/envs/mujoco/swimmer_v3.py | 2 +- gymnasium/envs/mujoco/swimmer_v4.py | 2 +- gymnasium/envs/mujoco/swimmer_v5.py | 2 +- gymnasium/envs/mujoco/walker2d.py | 2 +- gymnasium/envs/mujoco/walker2d_v3.py | 2 +- gymnasium/envs/mujoco/walker2d_v4.py | 2 +- gymnasium/envs/mujoco/walker2d_v5.py | 2 +- gymnasium/envs/toy_text/blackjack.py | 1 + gymnasium/envs/toy_text/cliffwalking.py | 3 +- gymnasium/envs/toy_text/frozen_lake.py | 3 +- gymnasium/envs/toy_text/taxi.py | 3 +- gymnasium/vector/vector_env.py | 8 +-- gymnasium/wrappers/common.py | 2 +- .../wrappers/vector/stateful_observation.py | 2 +- .../wrappers/vector/vectorize_observation.py | 4 +- 59 files changed, 159 insertions(+), 55 deletions(-) delete mode 100644 docs/api/wrappers/vector_wrappers.md diff --git a/docs/api/vector.md b/docs/api/vector.md index e0db3e8ae..0d1887397 100644 --- a/docs/api/vector.md +++ b/docs/api/vector.md @@ -2,7 +2,7 @@ title: Vector --- -# Vector environments +# Vectorize ```{toctree} :hidden: @@ -19,10 +19,10 @@ vector/utils ``` ### Methods - ```{eval-rst} -.. automethod:: gymnasium.vector.VectorEnv.reset .. automethod:: gymnasium.vector.VectorEnv.step +.. automethod:: gymnasium.vector.VectorEnv.reset +.. automethod:: gymnasium.vector.VectorEnv.render .. automethod:: gymnasium.vector.VectorEnv.close ``` @@ -47,11 +47,19 @@ vector/utils .. autoattribute:: gymnasium.vector.VectorEnv.single_observation_space - The observation space of an environment copy. + The observation space of a sub-environment. .. autoattribute:: gymnasium.vector.VectorEnv.spec The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec` + +.. autoattribute:: gymnasium.vector.VectorEnv.render_mode + + The render mode of the environment which should follow similar specifications to `Env.render_mode`. + +.. autoattribute:: gymnasium.vector.VectorEnv.closed + + If the vector environment has been closed already. ``` ### Additional Methods diff --git a/docs/api/vector/wrappers.md b/docs/api/vector/wrappers.md index 2e3272c74..183cd7e86 100644 --- a/docs/api/vector/wrappers.md +++ b/docs/api/vector/wrappers.md @@ -2,19 +2,19 @@ title: Vector Wrappers --- -# Vector Wrappers +# Wrappers ```{eval-rst} .. autoclass:: gymnasium.vector.VectorWrapper .. automethod:: gymnasium.vector.VectorWrapper.step .. automethod:: gymnasium.vector.VectorWrapper.reset + .. automethod:: gymnasium.vector.VectorWrapper.render .. automethod:: gymnasium.vector.VectorWrapper.close .. autoclass:: gymnasium.vector.VectorObservationWrapper - .. automethod:: gymnasium.vector.VectorObservationWrapper.vector_observation - .. automethod:: gymnasium.vector.VectorObservationWrapper.single_observation + .. automethod:: gymnasium.vector.VectorObservationWrapper.observations .. autoclass:: gymnasium.vector.VectorActionWrapper @@ -24,3 +24,57 @@ title: Vector Wrappers .. automethod:: gymnasium.vector.VectorRewardWrapper.rewards ``` + +## Vector Only wrappers + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.vector.DictInfoToList + +.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation +.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction +.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward +``` + +## Vectorized Common wrappers + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.vector.RecordEpisodeStatistics +``` + +## Implemented Observation wrappers + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.vector.TransformObservation +.. autoclass:: gymnasium.wrappers.vector.FilterObservation +.. autoclass:: gymnasium.wrappers.vector.FlattenObservation +.. autoclass:: gymnasium.wrappers.vector.GrayscaleObservation +.. autoclass:: gymnasium.wrappers.vector.ResizeObservation +.. autoclass:: gymnasium.wrappers.vector.ReshapeObservation +.. autoclass:: gymnasium.wrappers.vector.RescaleObservation +.. autoclass:: gymnasium.wrappers.vector.DtypeObservation +.. autoclass:: gymnasium.wrappers.vector.NormalizeObservation +``` + +## Implemented Action wrappers + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.vector.TransformAction +.. autoclass:: gymnasium.wrappers.vector.ClipAction +.. autoclass:: gymnasium.wrappers.vector.RescaleAction +``` + +## Implemented Reward wrappers + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.vector.TransformReward +.. autoclass:: gymnasium.wrappers.vector.ClipReward +.. autoclass:: gymnasium.wrappers.vector.NormalizeReward +``` + +## Implemented Data Conversion wrappers + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.vector.JaxToNumpy +.. autoclass:: gymnasium.wrappers.vector.JaxToTorch +.. autoclass:: gymnasium.wrappers.vector.NumpyToTorch +``` diff --git a/docs/api/wrappers/table.md b/docs/api/wrappers/table.md index 35de64fc9..240b9f1f9 100644 --- a/docs/api/wrappers/table.md +++ b/docs/api/wrappers/table.md @@ -78,7 +78,6 @@ wrapper in the page on the wrapper type - Applies a function to the ``observation`` received from the environment's ``reset`` and ``step`` that is passed back to the user. * - :class:`TransformReward` - Applies a function to the ``reward`` received from the environment's ``step``. - ``` ## Vector only Wrappers diff --git a/docs/api/wrappers/vector_wrappers.md b/docs/api/wrappers/vector_wrappers.md deleted file mode 100644 index e2636f642..000000000 --- a/docs/api/wrappers/vector_wrappers.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Vector Wrappers ---- - -# Vector wrappers - -## Vector only wrappers - -```{eval-rst} -.. autoclass:: gymnasium.wrappers.vector.DictInfoToList -``` - -## Vectorize Transform Wrappers to Vector Wrappers - -```{eval-rst} -.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation -.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction -.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward -``` diff --git a/gymnasium/core.py b/gymnasium/core.py index bf8e87dc6..e7bea4aca 100644 --- a/gymnasium/core.py +++ b/gymnasium/core.py @@ -42,10 +42,16 @@ class Env(Generic[ObsType, ActType]): - :attr:`np_random` - The random number generator for the environment. This is automatically assigned during ``super().reset(seed=seed)`` and when assessing :attr:`np_random`. - .. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class + .. seealso:: For modifying or extending environments use the :class:`gymnasium.Wrapper` class Note: To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``. + + Note: + For strict type checking (e.g., mypy or pyright), :class:`Env` is a generic class with two parameterized types: ``ObsType`` and ``ActType``. + The ``ObsType`` and ``ActType`` are the expected types of the observations and actions used in :meth:`reset` and :meth:`step`. + The environment's :attr:`observation_space` and :attr:`action_space` should have type ``Space[ObsType]`` and ``Space[ActType]``, + see a space's implementation to find its parameterized type. """ # Set this in SOME subclasses @@ -473,7 +479,11 @@ class ObservationWrapper(Wrapper[WrapperObsType, ActType, ObsType, ActType]): """ def __init__(self, env: Env[ObsType, ActType]): - """Constructor for the observation wrapper.""" + """Constructor for the observation wrapper. + + Args: + env: Environment to be wrapped. + """ Wrapper.__init__(self, env) def reset( @@ -513,7 +523,11 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]): """ def __init__(self, env: Env[ObsType, ActType]): - """Constructor for the Reward wrapper.""" + """Constructor for the Reward wrapper. + + Args: + env: Environment to be wrapped. + """ Wrapper.__init__(self, env) def step( @@ -536,20 +550,25 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]): class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]): - """Superclass of wrappers that can modify the action before :meth:`env.step`. + """Superclass of wrappers that can modify the action before :meth:`step`. If you would like to apply a function to the action before passing it to the base environment, you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement that transformation. The transformation defined in that method must take values in the base environment’s action space. However, its domain might differ from the original action space. - In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in + In that case, you need to specify the new action space of the wrapper by setting :attr:`action_space` in the :meth:`__init__` method of your wrapper. - Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions. + Among others, Gymnasium provides the action wrappers :class:`gymnasium.wrappers.ClipAction` and + :class:`gymnasium.wrappers.RescaleAction` for clipping and rescaling actions. """ def __init__(self, env: Env[ObsType, ActType]): - """Constructor for the action wrapper.""" + """Constructor for the action wrapper. + + Args: + env: Environment to be wrapped. + """ Wrapper.__init__(self, env) def step( @@ -559,7 +578,7 @@ class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]): return self.env.step(self.action(action)) def action(self, action: WrapperActType) -> ActType: - """Returns a modified action before :meth:`env.step` is called. + """Returns a modified action before :meth:`step` is called. Args: action: The original :meth:`step` actions diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index b4f69bab5..14ad83164 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -606,6 +606,7 @@ class BipedalWalker(gym.Env, EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return np.array(state, dtype=np.float32), reward, terminated, False, {} def render(self): diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 545861bbb..0d6682443 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -673,6 +673,7 @@ class LunarLander(gym.Env, EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return np.array(state, dtype=np.float32), reward, terminated, False, {} def render(self): diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py index 076ff3354..75f9cd465 100644 --- a/gymnasium/envs/classic_control/acrobot.py +++ b/gymnasium/envs/classic_control/acrobot.py @@ -225,7 +225,8 @@ class AcrobotEnv(Env): if self.render_mode == "human": self.render() - return (self._get_ob(), reward, terminated, False, {}) + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` + return self._get_ob(), reward, terminated, False, {} def _get_ob(self): s = self.state diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py index daebccf63..6ef96efad 100644 --- a/gymnasium/envs/classic_control/cartpole.py +++ b/gymnasium/envs/classic_control/cartpole.py @@ -204,6 +204,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return np.array(self.state, dtype=np.float32), reward, terminated, False, {} def reset( diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py index 59fa3321a..6397f7e97 100644 --- a/gymnasium/envs/classic_control/continuous_mountain_car.py +++ b/gymnasium/envs/classic_control/continuous_mountain_car.py @@ -175,6 +175,7 @@ class Continuous_MountainCarEnv(gym.Env): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return self.state, reward, terminated, False, {} def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): diff --git a/gymnasium/envs/classic_control/mountain_car.py b/gymnasium/envs/classic_control/mountain_car.py index a7de427ac..dfc06070a 100644 --- a/gymnasium/envs/classic_control/mountain_car.py +++ b/gymnasium/envs/classic_control/mountain_car.py @@ -146,6 +146,7 @@ class MountainCarEnv(gym.Env): self.state = (position, velocity) if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return np.array(self.state, dtype=np.float32), reward, terminated, False, {} def reset( diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index 9b477aa39..0c1516680 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -141,6 +141,7 @@ class PendulumEnv(gym.Env): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return self._get_obs(), -costs, False, False, {} def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): diff --git a/gymnasium/envs/mujoco/ant.py b/gymnasium/envs/mujoco/ant.py index 348512f1b..3cccf8cb2 100644 --- a/gymnasium/envs/mujoco/ant.py +++ b/gymnasium/envs/mujoco/ant.py @@ -45,6 +45,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( ob, reward, diff --git a/gymnasium/envs/mujoco/ant_v3.py b/gymnasium/envs/mujoco/ant_v3.py index f11c3e483..ebf5fa000 100644 --- a/gymnasium/envs/mujoco/ant_v3.py +++ b/gymnasium/envs/mujoco/ant_v3.py @@ -147,6 +147,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_obs(self): diff --git a/gymnasium/envs/mujoco/ant_v4.py b/gymnasium/envs/mujoco/ant_v4.py index 310c444ce..a277f6de0 100644 --- a/gymnasium/envs/mujoco/ant_v4.py +++ b/gymnasium/envs/mujoco/ant_v4.py @@ -159,6 +159,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_obs(self): diff --git a/gymnasium/envs/mujoco/ant_v5.py b/gymnasium/envs/mujoco/ant_v5.py index 8b3ab177e..bbf26e1de 100644 --- a/gymnasium/envs/mujoco/ant_v5.py +++ b/gymnasium/envs/mujoco/ant_v5.py @@ -390,6 +390,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_rew(self, x_velocity: float, action): diff --git a/gymnasium/envs/mujoco/half_cheetah.py b/gymnasium/envs/mujoco/half_cheetah.py index f4ae8e791..fd3eca0e1 100644 --- a/gymnasium/envs/mujoco/half_cheetah.py +++ b/gymnasium/envs/mujoco/half_cheetah.py @@ -35,6 +35,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( ob, reward, diff --git a/gymnasium/envs/mujoco/half_cheetah_v3.py b/gymnasium/envs/mujoco/half_cheetah_v3.py index a8b9a410d..4c5e1d3d8 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v3.py +++ b/gymnasium/envs/mujoco/half_cheetah_v3.py @@ -90,6 +90,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_obs(self): diff --git a/gymnasium/envs/mujoco/half_cheetah_v4.py b/gymnasium/envs/mujoco/half_cheetah_v4.py index bc835c0ee..4965c66ec 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v4.py +++ b/gymnasium/envs/mujoco/half_cheetah_v4.py @@ -93,6 +93,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_obs(self): diff --git a/gymnasium/envs/mujoco/half_cheetah_v5.py b/gymnasium/envs/mujoco/half_cheetah_v5.py index 0649f4503..c858a58c7 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v5.py +++ b/gymnasium/envs/mujoco/half_cheetah_v5.py @@ -251,6 +251,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, False, False, info def _get_rew(self, x_velocity: float, action): diff --git a/gymnasium/envs/mujoco/hopper.py b/gymnasium/envs/mujoco/hopper.py index 11f4c4080..1385769d0 100644 --- a/gymnasium/envs/mujoco/hopper.py +++ b/gymnasium/envs/mujoco/hopper.py @@ -42,6 +42,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ob, reward, terminated, False, {} def _get_obs(self): diff --git a/gymnasium/envs/mujoco/hopper_v3.py b/gymnasium/envs/mujoco/hopper_v3.py index 7d2f94738..05d20c90c 100644 --- a/gymnasium/envs/mujoco/hopper_v3.py +++ b/gymnasium/envs/mujoco/hopper_v3.py @@ -151,6 +151,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def reset_model(self): diff --git a/gymnasium/envs/mujoco/hopper_v4.py b/gymnasium/envs/mujoco/hopper_v4.py index 2d32b9b86..e6819b2f0 100644 --- a/gymnasium/envs/mujoco/hopper_v4.py +++ b/gymnasium/envs/mujoco/hopper_v4.py @@ -152,6 +152,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def reset_model(self): diff --git a/gymnasium/envs/mujoco/hopper_v5.py b/gymnasium/envs/mujoco/hopper_v5.py index 68813d190..fb0eb8a2e 100644 --- a/gymnasium/envs/mujoco/hopper_v5.py +++ b/gymnasium/envs/mujoco/hopper_v5.py @@ -328,6 +328,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_rew(self, x_velocity: float, action): diff --git a/gymnasium/envs/mujoco/humanoid.py b/gymnasium/envs/mujoco/humanoid.py index 59370b211..15be9b636 100644 --- a/gymnasium/envs/mujoco/humanoid.py +++ b/gymnasium/envs/mujoco/humanoid.py @@ -60,6 +60,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( self._get_obs(), reward, diff --git a/gymnasium/envs/mujoco/humanoid_v3.py b/gymnasium/envs/mujoco/humanoid_v3.py index 34157b7ae..35e8e28f5 100644 --- a/gymnasium/envs/mujoco/humanoid_v3.py +++ b/gymnasium/envs/mujoco/humanoid_v3.py @@ -174,6 +174,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def reset_model(self): diff --git a/gymnasium/envs/mujoco/humanoid_v4.py b/gymnasium/envs/mujoco/humanoid_v4.py index c5a1bc9b7..ee693d3ab 100644 --- a/gymnasium/envs/mujoco/humanoid_v4.py +++ b/gymnasium/envs/mujoco/humanoid_v4.py @@ -161,6 +161,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def reset_model(self): diff --git a/gymnasium/envs/mujoco/humanoid_v5.py b/gymnasium/envs/mujoco/humanoid_v5.py index 1834d6d48..6548a20fc 100644 --- a/gymnasium/envs/mujoco/humanoid_v5.py +++ b/gymnasium/envs/mujoco/humanoid_v5.py @@ -513,6 +513,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_rew(self, x_velocity: float, action): diff --git a/gymnasium/envs/mujoco/humanoidstandup.py b/gymnasium/envs/mujoco/humanoidstandup.py index 95ff346fa..9a0cd17f9 100644 --- a/gymnasium/envs/mujoco/humanoidstandup.py +++ b/gymnasium/envs/mujoco/humanoidstandup.py @@ -54,6 +54,7 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( self._get_obs(), reward, diff --git a/gymnasium/envs/mujoco/humanoidstandup_v4.py b/gymnasium/envs/mujoco/humanoidstandup_v4.py index fe3cc9f06..727285f48 100644 --- a/gymnasium/envs/mujoco/humanoidstandup_v4.py +++ b/gymnasium/envs/mujoco/humanoidstandup_v4.py @@ -63,6 +63,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( self._get_obs(), reward, diff --git a/gymnasium/envs/mujoco/humanoidstandup_v5.py b/gymnasium/envs/mujoco/humanoidstandup_v5.py index 99b35cc50..d8845ae38 100644 --- a/gymnasium/envs/mujoco/humanoidstandup_v5.py +++ b/gymnasium/envs/mujoco/humanoidstandup_v5.py @@ -456,6 +456,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return self._get_obs(), reward, False, False, info def _get_rew(self, pos_after: float, action): diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum.py b/gymnasium/envs/mujoco/inverted_double_pendulum.py index e35a3ae6e..48b058447 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum.py @@ -40,6 +40,7 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ob, r, terminated, False, {} def _get_obs(self): diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py b/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py index 2a9f9345d..758a0f253 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py @@ -46,6 +46,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): terminated = bool(y <= 1) if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ob, r, terminated, False, {} def _get_obs(self): diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py index bf2835577..336385f80 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum_v5.py @@ -203,6 +203,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_rew(self, x, y, terminated): diff --git a/gymnasium/envs/mujoco/inverted_pendulum.py b/gymnasium/envs/mujoco/inverted_pendulum.py index 5ca8b0486..10d309810 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum.py +++ b/gymnasium/envs/mujoco/inverted_pendulum.py @@ -35,6 +35,7 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ob, reward, terminated, False, {} def reset_model(self): diff --git a/gymnasium/envs/mujoco/inverted_pendulum_v4.py b/gymnasium/envs/mujoco/inverted_pendulum_v4.py index 72f6a9c88..d3921804f 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum_v4.py +++ b/gymnasium/envs/mujoco/inverted_pendulum_v4.py @@ -40,6 +40,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2)) if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ob, reward, terminated, False, {} def reset_model(self): diff --git a/gymnasium/envs/mujoco/inverted_pendulum_v5.py b/gymnasium/envs/mujoco/inverted_pendulum_v5.py index 20d11d483..6c75137e3 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum_v5.py +++ b/gymnasium/envs/mujoco/inverted_pendulum_v5.py @@ -186,6 +186,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def reset_model(self): diff --git a/gymnasium/envs/mujoco/pusher.py b/gymnasium/envs/mujoco/pusher.py index 2c446bdc2..8fcb8f418 100644 --- a/gymnasium/envs/mujoco/pusher.py +++ b/gymnasium/envs/mujoco/pusher.py @@ -36,6 +36,7 @@ class PusherEnv(MuJocoPyEnv, utils.EzPickle): self.render() ob = self._get_obs() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( ob, reward, diff --git a/gymnasium/envs/mujoco/pusher_v4.py b/gymnasium/envs/mujoco/pusher_v4.py index 651c6325c..72d571f29 100644 --- a/gymnasium/envs/mujoco/pusher_v4.py +++ b/gymnasium/envs/mujoco/pusher_v4.py @@ -47,6 +47,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): self.render() ob = self._get_obs() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( ob, reward, diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py index 98091e754..5b05f88e3 100644 --- a/gymnasium/envs/mujoco/pusher_v5.py +++ b/gymnasium/envs/mujoco/pusher_v5.py @@ -226,9 +226,10 @@ class PusherEnv(MujocoEnv, utils.EzPickle): observation = self._get_obs() reward, reward_info = self._get_rew(action) info = reward_info + if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, False, False, info def _get_rew(self, action): diff --git a/gymnasium/envs/mujoco/reacher.py b/gymnasium/envs/mujoco/reacher.py index 342a82a29..73034789a 100644 --- a/gymnasium/envs/mujoco/reacher.py +++ b/gymnasium/envs/mujoco/reacher.py @@ -33,6 +33,7 @@ class ReacherEnv(MuJocoPyEnv, utils.EzPickle): self.render() ob = self._get_obs() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( ob, reward, diff --git a/gymnasium/envs/mujoco/reacher_v4.py b/gymnasium/envs/mujoco/reacher_v4.py index 9bc233bc3..aa2dc204c 100644 --- a/gymnasium/envs/mujoco/reacher_v4.py +++ b/gymnasium/envs/mujoco/reacher_v4.py @@ -41,6 +41,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): self.render() ob = self._get_obs() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( ob, reward, diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py index a40a8564e..883b7808c 100644 --- a/gymnasium/envs/mujoco/reacher_v5.py +++ b/gymnasium/envs/mujoco/reacher_v5.py @@ -206,6 +206,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, False, False, info def _get_rew(self, action): diff --git a/gymnasium/envs/mujoco/swimmer.py b/gymnasium/envs/mujoco/swimmer.py index a82bab533..19f8f562e 100644 --- a/gymnasium/envs/mujoco/swimmer.py +++ b/gymnasium/envs/mujoco/swimmer.py @@ -36,6 +36,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ( ob, reward, diff --git a/gymnasium/envs/mujoco/swimmer_v3.py b/gymnasium/envs/mujoco/swimmer_v3.py index b49df33d1..a5f6b7d29 100644 --- a/gymnasium/envs/mujoco/swimmer_v3.py +++ b/gymnasium/envs/mujoco/swimmer_v3.py @@ -91,7 +91,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, False, False, info def _get_obs(self): diff --git a/gymnasium/envs/mujoco/swimmer_v4.py b/gymnasium/envs/mujoco/swimmer_v4.py index 3f597b86e..639116bf7 100644 --- a/gymnasium/envs/mujoco/swimmer_v4.py +++ b/gymnasium/envs/mujoco/swimmer_v4.py @@ -85,7 +85,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, False, False, info def _get_obs(self): diff --git a/gymnasium/envs/mujoco/swimmer_v5.py b/gymnasium/envs/mujoco/swimmer_v5.py index a231cc627..c3b76bed9 100644 --- a/gymnasium/envs/mujoco/swimmer_v5.py +++ b/gymnasium/envs/mujoco/swimmer_v5.py @@ -247,7 +247,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, False, False, info def _get_rew(self, x_velocity: float, action): diff --git a/gymnasium/envs/mujoco/walker2d.py b/gymnasium/envs/mujoco/walker2d.py index 74d0890df..52010981b 100644 --- a/gymnasium/envs/mujoco/walker2d.py +++ b/gymnasium/envs/mujoco/walker2d.py @@ -36,7 +36,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return ob, reward, terminated, False, {} def _get_obs(self): diff --git a/gymnasium/envs/mujoco/walker2d_v3.py b/gymnasium/envs/mujoco/walker2d_v3.py index c520e4342..13f74beca 100644 --- a/gymnasium/envs/mujoco/walker2d_v3.py +++ b/gymnasium/envs/mujoco/walker2d_v3.py @@ -140,7 +140,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle): if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def reset_model(self): diff --git a/gymnasium/envs/mujoco/walker2d_v4.py b/gymnasium/envs/mujoco/walker2d_v4.py index fade5de41..f4c67d529 100644 --- a/gymnasium/envs/mujoco/walker2d_v4.py +++ b/gymnasium/envs/mujoco/walker2d_v4.py @@ -144,7 +144,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def reset_model(self): diff --git a/gymnasium/envs/mujoco/walker2d_v5.py b/gymnasium/envs/mujoco/walker2d_v5.py index 555ca4944..6329a3115 100644 --- a/gymnasium/envs/mujoco/walker2d_v5.py +++ b/gymnasium/envs/mujoco/walker2d_v5.py @@ -322,7 +322,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): if self.render_mode == "human": self.render() - + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return observation, reward, terminated, False, info def _get_rew(self, x_velocity: float, action): diff --git a/gymnasium/envs/toy_text/blackjack.py b/gymnasium/envs/toy_text/blackjack.py index f0ba2c25f..2792bd77f 100644 --- a/gymnasium/envs/toy_text/blackjack.py +++ b/gymnasium/envs/toy_text/blackjack.py @@ -193,6 +193,7 @@ class BlackjackEnv(gym.Env): if self.render_mode == "human": self.render() + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` return self._get_obs(), reward, terminated, False, {} def _get_obs(self): diff --git a/gymnasium/envs/toy_text/cliffwalking.py b/gymnasium/envs/toy_text/cliffwalking.py index e86da0c5a..76655baba 100644 --- a/gymnasium/envs/toy_text/cliffwalking.py +++ b/gymnasium/envs/toy_text/cliffwalking.py @@ -179,7 +179,8 @@ class CliffWalkingEnv(Env): if self.render_mode == "human": self.render() - return (int(s), r, t, False, {"prob": p}) + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` + return int(s), r, t, False, {"prob": p} def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) diff --git a/gymnasium/envs/toy_text/frozen_lake.py b/gymnasium/envs/toy_text/frozen_lake.py index 7058ae11e..5dd129a70 100644 --- a/gymnasium/envs/toy_text/frozen_lake.py +++ b/gymnasium/envs/toy_text/frozen_lake.py @@ -306,7 +306,8 @@ class FrozenLakeEnv(Env): if self.render_mode == "human": self.render() - return (int(s), r, t, False, {"prob": p}) + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` + return int(s), r, t, False, {"prob": p} def reset( self, diff --git a/gymnasium/envs/toy_text/taxi.py b/gymnasium/envs/toy_text/taxi.py index 1cf4f151d..f354b2b8b 100644 --- a/gymnasium/envs/toy_text/taxi.py +++ b/gymnasium/envs/toy_text/taxi.py @@ -291,7 +291,8 @@ class TaxiEnv(Env): if self.render_mode == "human": self.render() - return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)}) + # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make` + return int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)} def reset( self, diff --git a/gymnasium/vector/vector_env.py b/gymnasium/vector/vector_env.py index 63fe95f64..31cc4fcad 100644 --- a/gymnasium/vector/vector_env.py +++ b/gymnasium/vector/vector_env.py @@ -445,7 +445,7 @@ class VectorObservationWrapper(VectorWrapper): ) -> tuple[ObsType, dict[str, Any]]: """Modifies the observation returned from the environment ``reset`` using the :meth:`observation`.""" observations, infos = self.env.reset(seed=seed, options=options) - return self.observation(observations), infos + return self.observations(observations), infos def step( self, actions: ActType @@ -453,18 +453,18 @@ class VectorObservationWrapper(VectorWrapper): """Modifies the observation returned from the environment ``step`` using the :meth:`observation`.""" observations, rewards, terminations, truncations, infos = self.env.step(actions) return ( - self.observation(observations), + self.observations(observations), rewards, terminations, truncations, infos, ) - def observation(self, observation: ObsType) -> ObsType: + def observations(self, observations: ObsType) -> ObsType: """Defines the vector observation transformation. Args: - observation: A vector observation from the environment + observations: A vector observation from the environment Returns: the transformed observation diff --git a/gymnasium/wrappers/common.py b/gymnasium/wrappers/common.py index 92a974fbe..864a0e364 100644 --- a/gymnasium/wrappers/common.py +++ b/gymnasium/wrappers/common.py @@ -462,7 +462,7 @@ class RecordEpisodeStatistics( Change logs: * v0.15.4 - Initially added - * v1.0.0 - Removed vector environment support for `wrappers.vector.RecordEpisodeStatistics` and add attribute ``time_queue`` + * v1.0.0 - Removed vector environment support (see :class:`gymnasium.wrappers.vector.RecordEpisodeStatistics`) and add attribute ``time_queue`` """ def __init__( diff --git a/gymnasium/wrappers/vector/stateful_observation.py b/gymnasium/wrappers/vector/stateful_observation.py index 8b2808cd4..edda8c8ef 100644 --- a/gymnasium/wrappers/vector/stateful_observation.py +++ b/gymnasium/wrappers/vector/stateful_observation.py @@ -81,7 +81,7 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor """Sets the property to freeze/continue the running mean calculation of the observation statistics.""" self._update_running_mean = setting - def observation(self, observations: ObsType) -> ObsType: + def observations(self, observations: ObsType) -> ObsType: """Defines the vector observation normalization function. Args: diff --git a/gymnasium/wrappers/vector/vectorize_observation.py b/gymnasium/wrappers/vector/vectorize_observation.py index c0b8f6331..383263bbd 100644 --- a/gymnasium/wrappers/vector/vectorize_observation.py +++ b/gymnasium/wrappers/vector/vectorize_observation.py @@ -69,7 +69,7 @@ class TransformObservation(VectorObservationWrapper): self.func = func - def observation(self, observations: ObsType) -> ObsType: + def observations(self, observations: ObsType) -> ObsType: """Apply function to the vector observation.""" return self.func(observations) @@ -148,7 +148,7 @@ class VectorizeTransformObservation(VectorObservationWrapper): self.same_out = self.observation_space == self.env.observation_space self.out = create_empty_array(self.single_observation_space, self.num_envs) - def observation(self, observations: ObsType) -> ObsType: + def observations(self, observations: ObsType) -> ObsType: """Iterates over the vector observations applying the single-agent wrapper ``observation`` then concatenates the observations together again.""" if self.same_out: return concatenate(