Update vector wrapper docs and add Env generic to doc (#847)

This commit is contained in:
Mark Towers
2023-12-22 14:48:22 +00:00
committed by GitHub
parent d756522eeb
commit 3b856dec96
59 changed files with 159 additions and 55 deletions

View File

@@ -2,7 +2,7 @@
title: Vector
---
# Vector environments
# Vectorize
```{toctree}
:hidden:
@@ -19,10 +19,10 @@ vector/utils
```
### Methods
```{eval-rst}
.. automethod:: gymnasium.vector.VectorEnv.reset
.. automethod:: gymnasium.vector.VectorEnv.step
.. automethod:: gymnasium.vector.VectorEnv.reset
.. automethod:: gymnasium.vector.VectorEnv.render
.. automethod:: gymnasium.vector.VectorEnv.close
```
@@ -47,11 +47,19 @@ vector/utils
.. autoattribute:: gymnasium.vector.VectorEnv.single_observation_space
The observation space of an environment copy.
The observation space of a sub-environment.
.. autoattribute:: gymnasium.vector.VectorEnv.spec
The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec`
.. autoattribute:: gymnasium.vector.VectorEnv.render_mode
The render mode of the environment which should follow similar specifications to `Env.render_mode`.
.. autoattribute:: gymnasium.vector.VectorEnv.closed
If the vector environment has been closed already.
```
### Additional Methods

View File

@@ -2,19 +2,19 @@
title: Vector Wrappers
---
# Vector Wrappers
# Wrappers
```{eval-rst}
.. autoclass:: gymnasium.vector.VectorWrapper
.. automethod:: gymnasium.vector.VectorWrapper.step
.. automethod:: gymnasium.vector.VectorWrapper.reset
.. automethod:: gymnasium.vector.VectorWrapper.render
.. automethod:: gymnasium.vector.VectorWrapper.close
.. autoclass:: gymnasium.vector.VectorObservationWrapper
.. automethod:: gymnasium.vector.VectorObservationWrapper.vector_observation
.. automethod:: gymnasium.vector.VectorObservationWrapper.single_observation
.. automethod:: gymnasium.vector.VectorObservationWrapper.observations
.. autoclass:: gymnasium.vector.VectorActionWrapper
@@ -24,3 +24,57 @@ title: Vector Wrappers
.. automethod:: gymnasium.vector.VectorRewardWrapper.rewards
```
## Vector Only wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
```
## Vectorized Common wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.RecordEpisodeStatistics
```
## Implemented Observation wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.TransformObservation
.. autoclass:: gymnasium.wrappers.vector.FilterObservation
.. autoclass:: gymnasium.wrappers.vector.FlattenObservation
.. autoclass:: gymnasium.wrappers.vector.GrayscaleObservation
.. autoclass:: gymnasium.wrappers.vector.ResizeObservation
.. autoclass:: gymnasium.wrappers.vector.ReshapeObservation
.. autoclass:: gymnasium.wrappers.vector.RescaleObservation
.. autoclass:: gymnasium.wrappers.vector.DtypeObservation
.. autoclass:: gymnasium.wrappers.vector.NormalizeObservation
```
## Implemented Action wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.TransformAction
.. autoclass:: gymnasium.wrappers.vector.ClipAction
.. autoclass:: gymnasium.wrappers.vector.RescaleAction
```
## Implemented Reward wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.TransformReward
.. autoclass:: gymnasium.wrappers.vector.ClipReward
.. autoclass:: gymnasium.wrappers.vector.NormalizeReward
```
## Implemented Data Conversion wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.JaxToNumpy
.. autoclass:: gymnasium.wrappers.vector.JaxToTorch
.. autoclass:: gymnasium.wrappers.vector.NumpyToTorch
```

View File

@@ -78,7 +78,6 @@ wrapper in the page on the wrapper type
- Applies a function to the ``observation`` received from the environment's ``reset`` and ``step`` that is passed back to the user.
* - :class:`TransformReward`
- Applies a function to the ``reward`` received from the environment's ``step``.
```
## Vector only Wrappers

View File

@@ -1,19 +0,0 @@
---
title: Vector Wrappers
---
# Vector wrappers
## Vector only wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
```
## Vectorize Transform Wrappers to Vector Wrappers
```{eval-rst}
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
```

View File

@@ -42,10 +42,16 @@ class Env(Generic[ObsType, ActType]):
- :attr:`np_random` - The random number generator for the environment. This is automatically assigned during
``super().reset(seed=seed)`` and when assessing :attr:`np_random`.
.. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class
.. seealso:: For modifying or extending environments use the :class:`gymnasium.Wrapper` class
Note:
To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``.
Note:
For strict type checking (e.g., mypy or pyright), :class:`Env` is a generic class with two parameterized types: ``ObsType`` and ``ActType``.
The ``ObsType`` and ``ActType`` are the expected types of the observations and actions used in :meth:`reset` and :meth:`step`.
The environment's :attr:`observation_space` and :attr:`action_space` should have type ``Space[ObsType]`` and ``Space[ActType]``,
see a space's implementation to find its parameterized type.
"""
# Set this in SOME subclasses
@@ -473,7 +479,11 @@ class ObservationWrapper(Wrapper[WrapperObsType, ActType, ObsType, ActType]):
"""
def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the observation wrapper."""
"""Constructor for the observation wrapper.
Args:
env: Environment to be wrapped.
"""
Wrapper.__init__(self, env)
def reset(
@@ -513,7 +523,11 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
"""
def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the Reward wrapper."""
"""Constructor for the Reward wrapper.
Args:
env: Environment to be wrapped.
"""
Wrapper.__init__(self, env)
def step(
@@ -536,20 +550,25 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
"""Superclass of wrappers that can modify the action before :meth:`env.step`.
"""Superclass of wrappers that can modify the action before :meth:`step`.
If you would like to apply a function to the action before passing it to the base environment,
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
that transformation. The transformation defined in that method must take values in the base environments
action space. However, its domain might differ from the original action space.
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in
In that case, you need to specify the new action space of the wrapper by setting :attr:`action_space` in
the :meth:`__init__` method of your wrapper.
Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions.
Among others, Gymnasium provides the action wrappers :class:`gymnasium.wrappers.ClipAction` and
:class:`gymnasium.wrappers.RescaleAction` for clipping and rescaling actions.
"""
def __init__(self, env: Env[ObsType, ActType]):
"""Constructor for the action wrapper."""
"""Constructor for the action wrapper.
Args:
env: Environment to be wrapped.
"""
Wrapper.__init__(self, env)
def step(
@@ -559,7 +578,7 @@ class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
return self.env.step(self.action(action))
def action(self, action: WrapperActType) -> ActType:
"""Returns a modified action before :meth:`env.step` is called.
"""Returns a modified action before :meth:`step` is called.
Args:
action: The original :meth:`step` actions

View File

@@ -606,6 +606,7 @@ class BipedalWalker(gym.Env, EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):

View File

@@ -673,6 +673,7 @@ class LunarLander(gym.Env, EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):

View File

@@ -225,7 +225,8 @@ class AcrobotEnv(Env):
if self.render_mode == "human":
self.render()
return (self._get_ob(), reward, terminated, False, {})
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_ob(), reward, terminated, False, {}
def _get_ob(self):
s = self.state

View File

@@ -204,6 +204,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(

View File

@@ -175,6 +175,7 @@ class Continuous_MountainCarEnv(gym.Env):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self.state, reward, terminated, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):

View File

@@ -146,6 +146,7 @@ class MountainCarEnv(gym.Env):
self.state = (position, velocity)
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(

View File

@@ -141,6 +141,7 @@ class PendulumEnv(gym.Env):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_obs(), -costs, False, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):

View File

@@ -45,6 +45,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,

View File

@@ -147,6 +147,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -159,6 +159,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -390,6 +390,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action):

View File

@@ -35,6 +35,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,

View File

@@ -90,6 +90,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -93,6 +93,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -251,6 +251,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info
def _get_rew(self, x_velocity: float, action):

View File

@@ -42,6 +42,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {}
def _get_obs(self):

View File

@@ -151,6 +151,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -152,6 +152,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -328,6 +328,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action):

View File

@@ -60,6 +60,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
self._get_obs(),
reward,

View File

@@ -174,6 +174,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -161,6 +161,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -513,6 +513,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action):

View File

@@ -54,6 +54,7 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
self._get_obs(),
reward,

View File

@@ -63,6 +63,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
self._get_obs(),
reward,

View File

@@ -456,6 +456,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_obs(), reward, False, False, info
def _get_rew(self, pos_after: float, action):

View File

@@ -40,6 +40,7 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, r, terminated, False, {}
def _get_obs(self):

View File

@@ -46,6 +46,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, r, terminated, False, {}
def _get_obs(self):

View File

@@ -203,6 +203,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_rew(self, x, y, terminated):

View File

@@ -35,6 +35,7 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {}
def reset_model(self):

View File

@@ -40,6 +40,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {}
def reset_model(self):

View File

@@ -186,6 +186,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -36,6 +36,7 @@ class PusherEnv(MuJocoPyEnv, utils.EzPickle):
self.render()
ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,

View File

@@ -47,6 +47,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
self.render()
ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,

View File

@@ -226,9 +226,10 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
observation = self._get_obs()
reward, reward_info = self._get_rew(action)
info = reward_info
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info
def _get_rew(self, action):

View File

@@ -33,6 +33,7 @@ class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
self.render()
ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,

View File

@@ -41,6 +41,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
self.render()
ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,

View File

@@ -206,6 +206,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info
def _get_rew(self, action):

View File

@@ -36,6 +36,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,

View File

@@ -91,7 +91,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info
def _get_obs(self):

View File

@@ -85,7 +85,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info
def _get_obs(self):

View File

@@ -247,7 +247,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info
def _get_rew(self, x_velocity: float, action):

View File

@@ -36,7 +36,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {}
def _get_obs(self):

View File

@@ -140,7 +140,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -144,7 +144,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -322,7 +322,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_rew(self, x_velocity: float, action):

View File

@@ -193,6 +193,7 @@ class BlackjackEnv(gym.Env):
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return self._get_obs(), reward, terminated, False, {}
def _get_obs(self):

View File

@@ -179,7 +179,8 @@ class CliffWalkingEnv(Env):
if self.render_mode == "human":
self.render()
return (int(s), r, t, False, {"prob": p})
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return int(s), r, t, False, {"prob": p}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed)

View File

@@ -306,7 +306,8 @@ class FrozenLakeEnv(Env):
if self.render_mode == "human":
self.render()
return (int(s), r, t, False, {"prob": p})
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return int(s), r, t, False, {"prob": p}
def reset(
self,

View File

@@ -291,7 +291,8 @@ class TaxiEnv(Env):
if self.render_mode == "human":
self.render()
return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)})
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)}
def reset(
self,

View File

@@ -445,7 +445,7 @@ class VectorObservationWrapper(VectorWrapper):
) -> tuple[ObsType, dict[str, Any]]:
"""Modifies the observation returned from the environment ``reset`` using the :meth:`observation`."""
observations, infos = self.env.reset(seed=seed, options=options)
return self.observation(observations), infos
return self.observations(observations), infos
def step(
self, actions: ActType
@@ -453,18 +453,18 @@ class VectorObservationWrapper(VectorWrapper):
"""Modifies the observation returned from the environment ``step`` using the :meth:`observation`."""
observations, rewards, terminations, truncations, infos = self.env.step(actions)
return (
self.observation(observations),
self.observations(observations),
rewards,
terminations,
truncations,
infos,
)
def observation(self, observation: ObsType) -> ObsType:
def observations(self, observations: ObsType) -> ObsType:
"""Defines the vector observation transformation.
Args:
observation: A vector observation from the environment
observations: A vector observation from the environment
Returns:
the transformed observation

View File

@@ -462,7 +462,7 @@ class RecordEpisodeStatistics(
Change logs:
* v0.15.4 - Initially added
* v1.0.0 - Removed vector environment support for `wrappers.vector.RecordEpisodeStatistics` and add attribute ``time_queue``
* v1.0.0 - Removed vector environment support (see :class:`gymnasium.wrappers.vector.RecordEpisodeStatistics`) and add attribute ``time_queue``
"""
def __init__(

View File

@@ -81,7 +81,7 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
"""Sets the property to freeze/continue the running mean calculation of the observation statistics."""
self._update_running_mean = setting
def observation(self, observations: ObsType) -> ObsType:
def observations(self, observations: ObsType) -> ObsType:
"""Defines the vector observation normalization function.
Args:

View File

@@ -69,7 +69,7 @@ class TransformObservation(VectorObservationWrapper):
self.func = func
def observation(self, observations: ObsType) -> ObsType:
def observations(self, observations: ObsType) -> ObsType:
"""Apply function to the vector observation."""
return self.func(observations)
@@ -148,7 +148,7 @@ class VectorizeTransformObservation(VectorObservationWrapper):
self.same_out = self.observation_space == self.env.observation_space
self.out = create_empty_array(self.single_observation_space, self.num_envs)
def observation(self, observations: ObsType) -> ObsType:
def observations(self, observations: ObsType) -> ObsType:
"""Iterates over the vector observations applying the single-agent wrapper ``observation`` then concatenates the observations together again."""
if self.same_out:
return concatenate(