Update vector wrapper docs and add Env generic to doc (#847)

This commit is contained in:
Mark Towers
2023-12-22 14:48:22 +00:00
committed by GitHub
parent d756522eeb
commit 3b856dec96
59 changed files with 159 additions and 55 deletions

View File

@@ -2,7 +2,7 @@
title: Vector title: Vector
--- ---
# Vector environments # Vectorize
```{toctree} ```{toctree}
:hidden: :hidden:
@@ -19,10 +19,10 @@ vector/utils
``` ```
### Methods ### Methods
```{eval-rst} ```{eval-rst}
.. automethod:: gymnasium.vector.VectorEnv.reset
.. automethod:: gymnasium.vector.VectorEnv.step .. automethod:: gymnasium.vector.VectorEnv.step
.. automethod:: gymnasium.vector.VectorEnv.reset
.. automethod:: gymnasium.vector.VectorEnv.render
.. automethod:: gymnasium.vector.VectorEnv.close .. automethod:: gymnasium.vector.VectorEnv.close
``` ```
@@ -47,11 +47,19 @@ vector/utils
.. autoattribute:: gymnasium.vector.VectorEnv.single_observation_space .. autoattribute:: gymnasium.vector.VectorEnv.single_observation_space
The observation space of an environment copy. The observation space of a sub-environment.
.. autoattribute:: gymnasium.vector.VectorEnv.spec .. autoattribute:: gymnasium.vector.VectorEnv.spec
The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec` The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec`
.. autoattribute:: gymnasium.vector.VectorEnv.render_mode
The render mode of the environment which should follow similar specifications to `Env.render_mode`.
.. autoattribute:: gymnasium.vector.VectorEnv.closed
If the vector environment has been closed already.
``` ```
### Additional Methods ### Additional Methods

View File

@@ -2,19 +2,19 @@
title: Vector Wrappers title: Vector Wrappers
--- ---
# Vector Wrappers # Wrappers
```{eval-rst} ```{eval-rst}
.. autoclass:: gymnasium.vector.VectorWrapper .. autoclass:: gymnasium.vector.VectorWrapper
.. automethod:: gymnasium.vector.VectorWrapper.step .. automethod:: gymnasium.vector.VectorWrapper.step
.. automethod:: gymnasium.vector.VectorWrapper.reset .. automethod:: gymnasium.vector.VectorWrapper.reset
.. automethod:: gymnasium.vector.VectorWrapper.render
.. automethod:: gymnasium.vector.VectorWrapper.close .. automethod:: gymnasium.vector.VectorWrapper.close
.. autoclass:: gymnasium.vector.VectorObservationWrapper .. autoclass:: gymnasium.vector.VectorObservationWrapper
.. automethod:: gymnasium.vector.VectorObservationWrapper.vector_observation .. automethod:: gymnasium.vector.VectorObservationWrapper.observations
.. automethod:: gymnasium.vector.VectorObservationWrapper.single_observation
.. autoclass:: gymnasium.vector.VectorActionWrapper .. autoclass:: gymnasium.vector.VectorActionWrapper
@@ -24,3 +24,57 @@ title: Vector Wrappers
.. automethod:: gymnasium.vector.VectorRewardWrapper.rewards .. automethod:: gymnasium.vector.VectorRewardWrapper.rewards
``` ```
## Vector Only wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
```
## Vectorized Common wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.RecordEpisodeStatistics
```
## Implemented Observation wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.TransformObservation
.. autoclass:: gymnasium.wrappers.vector.FilterObservation
.. autoclass:: gymnasium.wrappers.vector.FlattenObservation
.. autoclass:: gymnasium.wrappers.vector.GrayscaleObservation
.. autoclass:: gymnasium.wrappers.vector.ResizeObservation
.. autoclass:: gymnasium.wrappers.vector.ReshapeObservation
.. autoclass:: gymnasium.wrappers.vector.RescaleObservation
.. autoclass:: gymnasium.wrappers.vector.DtypeObservation
.. autoclass:: gymnasium.wrappers.vector.NormalizeObservation
```
## Implemented Action wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.TransformAction
.. autoclass:: gymnasium.wrappers.vector.ClipAction
.. autoclass:: gymnasium.wrappers.vector.RescaleAction
```
## Implemented Reward wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.TransformReward
.. autoclass:: gymnasium.wrappers.vector.ClipReward
.. autoclass:: gymnasium.wrappers.vector.NormalizeReward
```
## Implemented Data Conversion wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.JaxToNumpy
.. autoclass:: gymnasium.wrappers.vector.JaxToTorch
.. autoclass:: gymnasium.wrappers.vector.NumpyToTorch
```

View File

@@ -78,7 +78,6 @@ wrapper in the page on the wrapper type
- Applies a function to the ``observation`` received from the environment's ``reset`` and ``step`` that is passed back to the user. - Applies a function to the ``observation`` received from the environment's ``reset`` and ``step`` that is passed back to the user.
* - :class:`TransformReward` * - :class:`TransformReward`
- Applies a function to the ``reward`` received from the environment's ``step``. - Applies a function to the ``reward`` received from the environment's ``step``.
``` ```
## Vector only Wrappers ## Vector only Wrappers

View File

@@ -1,19 +0,0 @@
---
title: Vector Wrappers
---
# Vector wrappers
## Vector only wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
```
## Vectorize Transform Wrappers to Vector Wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
```

View File

@@ -42,10 +42,16 @@ class Env(Generic[ObsType, ActType]):
- :attr:`np_random` - The random number generator for the environment. This is automatically assigned during - :attr:`np_random` - The random number generator for the environment. This is automatically assigned during
``super().reset(seed=seed)`` and when assessing :attr:`np_random`. ``super().reset(seed=seed)`` and when assessing :attr:`np_random`.
.. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class .. seealso:: For modifying or extending environments use the :class:`gymnasium.Wrapper` class
Note: Note:
To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``. To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``.
Note:
For strict type checking (e.g., mypy or pyright), :class:`Env` is a generic class with two parameterized types: ``ObsType`` and ``ActType``.
The ``ObsType`` and ``ActType`` are the expected types of the observations and actions used in :meth:`reset` and :meth:`step`.
The environment's :attr:`observation_space` and :attr:`action_space` should have type ``Space[ObsType]`` and ``Space[ActType]``,
see a space's implementation to find its parameterized type.
""" """
# Set this in SOME subclasses # Set this in SOME subclasses
@@ -473,7 +479,11 @@ class ObservationWrapper(Wrapper[WrapperObsType, ActType, ObsType, ActType]):
""" """
def __init__(self, env: Env[ObsType, ActType]): def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the observation wrapper.""" """Constructor for the observation wrapper.
Args:
env: Environment to be wrapped.
"""
Wrapper.__init__(self, env) Wrapper.__init__(self, env)
def reset( def reset(
@@ -513,7 +523,11 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
""" """
def __init__(self, env: Env[ObsType, ActType]): def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the Reward wrapper.""" """Constructor for the Reward wrapper.
Args:
env: Environment to be wrapped.
"""
Wrapper.__init__(self, env) Wrapper.__init__(self, env)
def step( def step(
@@ -536,20 +550,25 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]): class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
"""Superclass of wrappers that can modify the action before :meth:`env.step`. """Superclass of wrappers that can modify the action before :meth:`step`.
If you would like to apply a function to the action before passing it to the base environment, If you would like to apply a function to the action before passing it to the base environment,
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
that transformation. The transformation defined in that method must take values in the base environments that transformation. The transformation defined in that method must take values in the base environments
action space. However, its domain might differ from the original action space. action space. However, its domain might differ from the original action space.
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in In that case, you need to specify the new action space of the wrapper by setting :attr:`action_space` in
the :meth:`__init__` method of your wrapper. the :meth:`__init__` method of your wrapper.
Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions. Among others, Gymnasium provides the action wrappers :class:`gymnasium.wrappers.ClipAction` and
:class:`gymnasium.wrappers.RescaleAction` for clipping and rescaling actions.
""" """
def __init__(self, env: Env[ObsType, ActType]): def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the action wrapper.""" """Constructor for the action wrapper.
Args:
env: Environment to be wrapped.
"""
Wrapper.__init__(self, env) Wrapper.__init__(self, env)
def step( def step(
@@ -559,7 +578,7 @@ class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
return self.env.step(self.action(action)) return self.env.step(self.action(action))
def action(self, action: WrapperActType) -> ActType: def action(self, action: WrapperActType) -> ActType:
"""Returns a modified action before :meth:`env.step` is called. """Returns a modified action before :meth:`step` is called.
Args: Args:
action: The original :meth:`step` actions action: The original :meth:`step` actions

View File

@@ -606,6 +606,7 @@ class BipedalWalker(gym.Env, EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(state, dtype=np.float32), reward, terminated, False, {} return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self): def render(self):

View File

@@ -673,6 +673,7 @@ class LunarLander(gym.Env, EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(state, dtype=np.float32), reward, terminated, False, {} return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self): def render(self):

View File

@@ -225,7 +225,8 @@ class AcrobotEnv(Env):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
return (self._get_ob(), reward, terminated, False, {}) # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_ob(), reward, terminated, False, {}
def _get_ob(self): def _get_ob(self):
s = self.state s = self.state

View File

@@ -204,6 +204,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(self.state, dtype=np.float32), reward, terminated, False, {} return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset( def reset(

View File

@@ -175,6 +175,7 @@ class Continuous_MountainCarEnv(gym.Env):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self.state, reward, terminated, False, {} return self.state, reward, terminated, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):

View File

@@ -146,6 +146,7 @@ class MountainCarEnv(gym.Env):
self.state = (position, velocity) self.state = (position, velocity)
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(self.state, dtype=np.float32), reward, terminated, False, {} return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset( def reset(

View File

@@ -141,6 +141,7 @@ class PendulumEnv(gym.Env):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_obs(), -costs, False, False, {} return self._get_obs(), -costs, False, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):

View File

@@ -45,6 +45,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
ob, ob,
reward, reward,

View File

@@ -147,6 +147,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_obs(self): def _get_obs(self):

View File

@@ -159,6 +159,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_obs(self): def _get_obs(self):

View File

@@ -390,6 +390,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action): def _get_rew(self, x_velocity: float, action):

View File

@@ -35,6 +35,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
ob, ob,
reward, reward,

View File

@@ -90,6 +90,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_obs(self): def _get_obs(self):

View File

@@ -93,6 +93,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_obs(self): def _get_obs(self):

View File

@@ -251,6 +251,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info return observation, reward, False, False, info
def _get_rew(self, x_velocity: float, action): def _get_rew(self, x_velocity: float, action):

View File

@@ -42,6 +42,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {} return ob, reward, terminated, False, {}
def _get_obs(self): def _get_obs(self):

View File

@@ -151,6 +151,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def reset_model(self): def reset_model(self):

View File

@@ -152,6 +152,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def reset_model(self): def reset_model(self):

View File

@@ -328,6 +328,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action): def _get_rew(self, x_velocity: float, action):

View File

@@ -60,6 +60,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
self._get_obs(), self._get_obs(),
reward, reward,

View File

@@ -174,6 +174,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def reset_model(self): def reset_model(self):

View File

@@ -161,6 +161,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def reset_model(self): def reset_model(self):

View File

@@ -513,6 +513,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action): def _get_rew(self, x_velocity: float, action):

View File

@@ -54,6 +54,7 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
self._get_obs(), self._get_obs(),
reward, reward,

View File

@@ -63,6 +63,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
self._get_obs(), self._get_obs(),
reward, reward,

View File

@@ -456,6 +456,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_obs(), reward, False, False, info return self._get_obs(), reward, False, False, info
def _get_rew(self, pos_after: float, action): def _get_rew(self, pos_after: float, action):

View File

@@ -40,6 +40,7 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, r, terminated, False, {} return ob, r, terminated, False, {}
def _get_obs(self): def _get_obs(self):

View File

@@ -46,6 +46,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
terminated = bool(y <= 1) terminated = bool(y <= 1)
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, r, terminated, False, {} return ob, r, terminated, False, {}
def _get_obs(self): def _get_obs(self):

View File

@@ -203,6 +203,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_rew(self, x, y, terminated): def _get_rew(self, x, y, terminated):

View File

@@ -35,6 +35,7 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {} return ob, reward, terminated, False, {}
def reset_model(self): def reset_model(self):

View File

@@ -40,6 +40,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2)) terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {} return ob, reward, terminated, False, {}
def reset_model(self): def reset_model(self):

View File

@@ -186,6 +186,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def reset_model(self): def reset_model(self):

View File

@@ -36,6 +36,7 @@ class PusherEnv(MuJocoPyEnv, utils.EzPickle):
self.render() self.render()
ob = self._get_obs() ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
ob, ob,
reward, reward,

View File

@@ -47,6 +47,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
self.render() self.render()
ob = self._get_obs() ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
ob, ob,
reward, reward,

View File

@@ -226,9 +226,10 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
observation = self._get_obs() observation = self._get_obs()
reward, reward_info = self._get_rew(action) reward, reward_info = self._get_rew(action)
info = reward_info info = reward_info
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info return observation, reward, False, False, info
def _get_rew(self, action): def _get_rew(self, action):

View File

@@ -33,6 +33,7 @@ class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
self.render() self.render()
ob = self._get_obs() ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
ob, ob,
reward, reward,

View File

@@ -41,6 +41,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
self.render() self.render()
ob = self._get_obs() ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
ob, ob,
reward, reward,

View File

@@ -206,6 +206,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info return observation, reward, False, False, info
def _get_rew(self, action): def _get_rew(self, action):

View File

@@ -36,6 +36,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ( return (
ob, ob,
reward, reward,

View File

@@ -91,7 +91,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info return observation, reward, False, False, info
def _get_obs(self): def _get_obs(self):

View File

@@ -85,7 +85,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info return observation, reward, False, False, info
def _get_obs(self): def _get_obs(self):

View File

@@ -247,7 +247,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info return observation, reward, False, False, info
def _get_rew(self, x_velocity: float, action): def _get_rew(self, x_velocity: float, action):

View File

@@ -36,7 +36,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {} return ob, reward, terminated, False, {}
def _get_obs(self): def _get_obs(self):

View File

@@ -140,7 +140,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def reset_model(self): def reset_model(self):

View File

@@ -144,7 +144,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def reset_model(self): def reset_model(self):

View File

@@ -322,7 +322,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action): def _get_rew(self, x_velocity: float, action):

View File

@@ -193,6 +193,7 @@ class BlackjackEnv(gym.Env):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_obs(), reward, terminated, False, {} return self._get_obs(), reward, terminated, False, {}
def _get_obs(self): def _get_obs(self):

View File

@@ -179,7 +179,8 @@ class CliffWalkingEnv(Env):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
return (int(s), r, t, False, {"prob": p}) # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return int(s), r, t, False, {"prob": p}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed) super().reset(seed=seed)

View File

@@ -306,7 +306,8 @@ class FrozenLakeEnv(Env):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
return (int(s), r, t, False, {"prob": p}) # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return int(s), r, t, False, {"prob": p}
def reset( def reset(
self, self,

View File

@@ -291,7 +291,8 @@ class TaxiEnv(Env):
if self.render_mode == "human": if self.render_mode == "human":
self.render() self.render()
return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)}) # truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)}
def reset( def reset(
self, self,

View File

@@ -445,7 +445,7 @@ class VectorObservationWrapper(VectorWrapper):
) -> tuple[ObsType, dict[str, Any]]: ) -> tuple[ObsType, dict[str, Any]]:
"""Modifies the observation returned from the environment ``reset`` using the :meth:`observation`.""" """Modifies the observation returned from the environment ``reset`` using the :meth:`observation`."""
observations, infos = self.env.reset(seed=seed, options=options) observations, infos = self.env.reset(seed=seed, options=options)
return self.observation(observations), infos return self.observations(observations), infos
def step( def step(
self, actions: ActType self, actions: ActType
@@ -453,18 +453,18 @@ class VectorObservationWrapper(VectorWrapper):
"""Modifies the observation returned from the environment ``step`` using the :meth:`observation`.""" """Modifies the observation returned from the environment ``step`` using the :meth:`observation`."""
observations, rewards, terminations, truncations, infos = self.env.step(actions) observations, rewards, terminations, truncations, infos = self.env.step(actions)
return ( return (
self.observation(observations), self.observations(observations),
rewards, rewards,
terminations, terminations,
truncations, truncations,
infos, infos,
) )
def observation(self, observation: ObsType) -> ObsType: def observations(self, observations: ObsType) -> ObsType:
"""Defines the vector observation transformation. """Defines the vector observation transformation.
Args: Args:
observation: A vector observation from the environment observations: A vector observation from the environment
Returns: Returns:
the transformed observation the transformed observation

View File

@@ -462,7 +462,7 @@ class RecordEpisodeStatistics(
Change logs: Change logs:
* v0.15.4 - Initially added * v0.15.4 - Initially added
* v1.0.0 - Removed vector environment support for `wrappers.vector.RecordEpisodeStatistics` and add attribute ``time_queue`` * v1.0.0 - Removed vector environment support (see :class:`gymnasium.wrappers.vector.RecordEpisodeStatistics`) and add attribute ``time_queue``
""" """
def __init__( def __init__(

View File

@@ -81,7 +81,7 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
"""Sets the property to freeze/continue the running mean calculation of the observation statistics.""" """Sets the property to freeze/continue the running mean calculation of the observation statistics."""
self._update_running_mean = setting self._update_running_mean = setting
def observation(self, observations: ObsType) -> ObsType: def observations(self, observations: ObsType) -> ObsType:
"""Defines the vector observation normalization function. """Defines the vector observation normalization function.
Args: Args:

View File

@@ -69,7 +69,7 @@ class TransformObservation(VectorObservationWrapper):
self.func = func self.func = func
def observation(self, observations: ObsType) -> ObsType: def observations(self, observations: ObsType) -> ObsType:
"""Apply function to the vector observation.""" """Apply function to the vector observation."""
return self.func(observations) return self.func(observations)
@@ -148,7 +148,7 @@ class VectorizeTransformObservation(VectorObservationWrapper):
self.same_out = self.observation_space == self.env.observation_space self.same_out = self.observation_space == self.env.observation_space
self.out = create_empty_array(self.single_observation_space, self.num_envs) self.out = create_empty_array(self.single_observation_space, self.num_envs)
def observation(self, observations: ObsType) -> ObsType: def observations(self, observations: ObsType) -> ObsType:
"""Iterates over the vector observations applying the single-agent wrapper ``observation`` then concatenates the observations together again.""" """Iterates over the vector observations applying the single-agent wrapper ``observation`` then concatenates the observations together again."""
if self.same_out: if self.same_out:
return concatenate( return concatenate(