mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-07-31 22:04:31 +00:00
Update vector wrapper docs and add Env generic to doc (#847)
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
title: Vector
|
||||
---
|
||||
|
||||
# Vector environments
|
||||
# Vectorize
|
||||
|
||||
```{toctree}
|
||||
:hidden:
|
||||
@@ -19,10 +19,10 @@ vector/utils
|
||||
```
|
||||
|
||||
### Methods
|
||||
|
||||
```{eval-rst}
|
||||
.. automethod:: gymnasium.vector.VectorEnv.reset
|
||||
.. automethod:: gymnasium.vector.VectorEnv.step
|
||||
.. automethod:: gymnasium.vector.VectorEnv.reset
|
||||
.. automethod:: gymnasium.vector.VectorEnv.render
|
||||
.. automethod:: gymnasium.vector.VectorEnv.close
|
||||
```
|
||||
|
||||
@@ -47,11 +47,19 @@ vector/utils
|
||||
|
||||
.. autoattribute:: gymnasium.vector.VectorEnv.single_observation_space
|
||||
|
||||
The observation space of an environment copy.
|
||||
The observation space of a sub-environment.
|
||||
|
||||
.. autoattribute:: gymnasium.vector.VectorEnv.spec
|
||||
|
||||
The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec`
|
||||
|
||||
.. autoattribute:: gymnasium.vector.VectorEnv.render_mode
|
||||
|
||||
The render mode of the environment which should follow similar specifications to `Env.render_mode`.
|
||||
|
||||
.. autoattribute:: gymnasium.vector.VectorEnv.closed
|
||||
|
||||
If the vector environment has been closed already.
|
||||
```
|
||||
|
||||
### Additional Methods
|
||||
|
@@ -2,19 +2,19 @@
|
||||
title: Vector Wrappers
|
||||
---
|
||||
|
||||
# Vector Wrappers
|
||||
# Wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.vector.VectorWrapper
|
||||
|
||||
.. automethod:: gymnasium.vector.VectorWrapper.step
|
||||
.. automethod:: gymnasium.vector.VectorWrapper.reset
|
||||
.. automethod:: gymnasium.vector.VectorWrapper.render
|
||||
.. automethod:: gymnasium.vector.VectorWrapper.close
|
||||
|
||||
.. autoclass:: gymnasium.vector.VectorObservationWrapper
|
||||
|
||||
.. automethod:: gymnasium.vector.VectorObservationWrapper.vector_observation
|
||||
.. automethod:: gymnasium.vector.VectorObservationWrapper.single_observation
|
||||
.. automethod:: gymnasium.vector.VectorObservationWrapper.observations
|
||||
|
||||
.. autoclass:: gymnasium.vector.VectorActionWrapper
|
||||
|
||||
@@ -24,3 +24,57 @@ title: Vector Wrappers
|
||||
|
||||
.. automethod:: gymnasium.vector.VectorRewardWrapper.rewards
|
||||
```
|
||||
|
||||
## Vector Only wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
|
||||
|
||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
|
||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
|
||||
```
|
||||
|
||||
## Vectorized Common wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.RecordEpisodeStatistics
|
||||
```
|
||||
|
||||
## Implemented Observation wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.TransformObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.FilterObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.FlattenObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.GrayscaleObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.ResizeObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.ReshapeObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.RescaleObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.DtypeObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.NormalizeObservation
|
||||
```
|
||||
|
||||
## Implemented Action wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.TransformAction
|
||||
.. autoclass:: gymnasium.wrappers.vector.ClipAction
|
||||
.. autoclass:: gymnasium.wrappers.vector.RescaleAction
|
||||
```
|
||||
|
||||
## Implemented Reward wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.TransformReward
|
||||
.. autoclass:: gymnasium.wrappers.vector.ClipReward
|
||||
.. autoclass:: gymnasium.wrappers.vector.NormalizeReward
|
||||
```
|
||||
|
||||
## Implemented Data Conversion wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.JaxToNumpy
|
||||
.. autoclass:: gymnasium.wrappers.vector.JaxToTorch
|
||||
.. autoclass:: gymnasium.wrappers.vector.NumpyToTorch
|
||||
```
|
||||
|
@@ -78,7 +78,6 @@ wrapper in the page on the wrapper type
|
||||
- Applies a function to the ``observation`` received from the environment's ``reset`` and ``step`` that is passed back to the user.
|
||||
* - :class:`TransformReward`
|
||||
- Applies a function to the ``reward`` received from the environment's ``step``.
|
||||
|
||||
```
|
||||
|
||||
## Vector only Wrappers
|
||||
|
@@ -1,19 +0,0 @@
|
||||
---
|
||||
title: Vector Wrappers
|
||||
---
|
||||
|
||||
# Vector wrappers
|
||||
|
||||
## Vector only wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
|
||||
```
|
||||
|
||||
## Vectorize Transform Wrappers to Vector Wrappers
|
||||
|
||||
```{eval-rst}
|
||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
|
||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
|
||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
|
||||
```
|
@@ -42,10 +42,16 @@ class Env(Generic[ObsType, ActType]):
|
||||
- :attr:`np_random` - The random number generator for the environment. This is automatically assigned during
|
||||
``super().reset(seed=seed)`` and when assessing :attr:`np_random`.
|
||||
|
||||
.. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class
|
||||
.. seealso:: For modifying or extending environments use the :class:`gymnasium.Wrapper` class
|
||||
|
||||
Note:
|
||||
To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``.
|
||||
|
||||
Note:
|
||||
For strict type checking (e.g., mypy or pyright), :class:`Env` is a generic class with two parameterized types: ``ObsType`` and ``ActType``.
|
||||
The ``ObsType`` and ``ActType`` are the expected types of the observations and actions used in :meth:`reset` and :meth:`step`.
|
||||
The environment's :attr:`observation_space` and :attr:`action_space` should have type ``Space[ObsType]`` and ``Space[ActType]``,
|
||||
see a space's implementation to find its parameterized type.
|
||||
"""
|
||||
|
||||
# Set this in SOME subclasses
|
||||
@@ -473,7 +479,11 @@ class ObservationWrapper(Wrapper[WrapperObsType, ActType, ObsType, ActType]):
|
||||
"""
|
||||
|
||||
def __init__(self, env: Env[ObsType, ActType]):
|
||||
"""Constructor for the observation wrapper."""
|
||||
"""Constructor for the observation wrapper.
|
||||
|
||||
Args:
|
||||
env: Environment to be wrapped.
|
||||
"""
|
||||
Wrapper.__init__(self, env)
|
||||
|
||||
def reset(
|
||||
@@ -513,7 +523,11 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
|
||||
"""
|
||||
|
||||
def __init__(self, env: Env[ObsType, ActType]):
|
||||
"""Constructor for the Reward wrapper."""
|
||||
"""Constructor for the Reward wrapper.
|
||||
|
||||
Args:
|
||||
env: Environment to be wrapped.
|
||||
"""
|
||||
Wrapper.__init__(self, env)
|
||||
|
||||
def step(
|
||||
@@ -536,20 +550,25 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
|
||||
|
||||
|
||||
class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
|
||||
"""Superclass of wrappers that can modify the action before :meth:`env.step`.
|
||||
"""Superclass of wrappers that can modify the action before :meth:`step`.
|
||||
|
||||
If you would like to apply a function to the action before passing it to the base environment,
|
||||
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
|
||||
that transformation. The transformation defined in that method must take values in the base environment’s
|
||||
action space. However, its domain might differ from the original action space.
|
||||
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in
|
||||
In that case, you need to specify the new action space of the wrapper by setting :attr:`action_space` in
|
||||
the :meth:`__init__` method of your wrapper.
|
||||
|
||||
Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions.
|
||||
Among others, Gymnasium provides the action wrappers :class:`gymnasium.wrappers.ClipAction` and
|
||||
:class:`gymnasium.wrappers.RescaleAction` for clipping and rescaling actions.
|
||||
"""
|
||||
|
||||
def __init__(self, env: Env[ObsType, ActType]):
|
||||
"""Constructor for the action wrapper."""
|
||||
"""Constructor for the action wrapper.
|
||||
|
||||
Args:
|
||||
env: Environment to be wrapped.
|
||||
"""
|
||||
Wrapper.__init__(self, env)
|
||||
|
||||
def step(
|
||||
@@ -559,7 +578,7 @@ class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
|
||||
return self.env.step(self.action(action))
|
||||
|
||||
def action(self, action: WrapperActType) -> ActType:
|
||||
"""Returns a modified action before :meth:`env.step` is called.
|
||||
"""Returns a modified action before :meth:`step` is called.
|
||||
|
||||
Args:
|
||||
action: The original :meth:`step` actions
|
||||
|
@@ -606,6 +606,7 @@ class BipedalWalker(gym.Env, EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def render(self):
|
||||
|
@@ -673,6 +673,7 @@ class LunarLander(gym.Env, EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def render(self):
|
||||
|
@@ -225,7 +225,8 @@ class AcrobotEnv(Env):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (self._get_ob(), reward, terminated, False, {})
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return self._get_ob(), reward, terminated, False, {}
|
||||
|
||||
def _get_ob(self):
|
||||
s = self.state
|
||||
|
@@ -204,6 +204,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def reset(
|
||||
|
@@ -175,6 +175,7 @@ class Continuous_MountainCarEnv(gym.Env):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return self.state, reward, terminated, False, {}
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||
|
@@ -146,6 +146,7 @@ class MountainCarEnv(gym.Env):
|
||||
self.state = (position, velocity)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def reset(
|
||||
|
@@ -141,6 +141,7 @@ class PendulumEnv(gym.Env):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return self._get_obs(), -costs, False, False, {}
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||
|
@@ -45,6 +45,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -147,6 +147,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -159,6 +159,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -390,6 +390,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_rew(self, x_velocity: float, action):
|
||||
|
@@ -35,6 +35,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -90,6 +90,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -93,6 +93,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -251,6 +251,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_rew(self, x_velocity: float, action):
|
||||
|
@@ -42,6 +42,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -151,6 +151,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -152,6 +152,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -328,6 +328,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_rew(self, x_velocity: float, action):
|
||||
|
@@ -60,6 +60,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
|
@@ -174,6 +174,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -161,6 +161,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -513,6 +513,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_rew(self, x_velocity: float, action):
|
||||
|
@@ -54,6 +54,7 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
|
@@ -63,6 +63,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
|
@@ -456,6 +456,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return self._get_obs(), reward, False, False, info
|
||||
|
||||
def _get_rew(self, pos_after: float, action):
|
||||
|
@@ -40,6 +40,7 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, r, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -46,6 +46,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
|
||||
terminated = bool(y <= 1)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, r, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -203,6 +203,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_rew(self, x, y, terminated):
|
||||
|
@@ -35,6 +35,7 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -40,6 +40,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
|
||||
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -186,6 +186,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -36,6 +36,7 @@ class PusherEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -47,6 +47,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -226,9 +226,10 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
|
||||
observation = self._get_obs()
|
||||
reward, reward_info = self._get_rew(action)
|
||||
info = reward_info
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_rew(self, action):
|
||||
|
@@ -33,6 +33,7 @@ class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -41,6 +41,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -206,6 +206,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_rew(self, action):
|
||||
|
@@ -36,6 +36,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -91,7 +91,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -85,7 +85,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -247,7 +247,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_rew(self, x_velocity: float, action):
|
||||
|
@@ -36,7 +36,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -140,7 +140,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -144,7 +144,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -322,7 +322,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_rew(self, x_velocity: float, action):
|
||||
|
@@ -193,6 +193,7 @@ class BlackjackEnv(gym.Env):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return self._get_obs(), reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -179,7 +179,8 @@ class CliffWalkingEnv(Env):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (int(s), r, t, False, {"prob": p})
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return int(s), r, t, False, {"prob": p}
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||
super().reset(seed=seed)
|
||||
|
@@ -306,7 +306,8 @@ class FrozenLakeEnv(Env):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (int(s), r, t, False, {"prob": p})
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return int(s), r, t, False, {"prob": p}
|
||||
|
||||
def reset(
|
||||
self,
|
||||
|
@@ -291,7 +291,8 @@ class TaxiEnv(Env):
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)})
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)}
|
||||
|
||||
def reset(
|
||||
self,
|
||||
|
@@ -445,7 +445,7 @@ class VectorObservationWrapper(VectorWrapper):
|
||||
) -> tuple[ObsType, dict[str, Any]]:
|
||||
"""Modifies the observation returned from the environment ``reset`` using the :meth:`observation`."""
|
||||
observations, infos = self.env.reset(seed=seed, options=options)
|
||||
return self.observation(observations), infos
|
||||
return self.observations(observations), infos
|
||||
|
||||
def step(
|
||||
self, actions: ActType
|
||||
@@ -453,18 +453,18 @@ class VectorObservationWrapper(VectorWrapper):
|
||||
"""Modifies the observation returned from the environment ``step`` using the :meth:`observation`."""
|
||||
observations, rewards, terminations, truncations, infos = self.env.step(actions)
|
||||
return (
|
||||
self.observation(observations),
|
||||
self.observations(observations),
|
||||
rewards,
|
||||
terminations,
|
||||
truncations,
|
||||
infos,
|
||||
)
|
||||
|
||||
def observation(self, observation: ObsType) -> ObsType:
|
||||
def observations(self, observations: ObsType) -> ObsType:
|
||||
"""Defines the vector observation transformation.
|
||||
|
||||
Args:
|
||||
observation: A vector observation from the environment
|
||||
observations: A vector observation from the environment
|
||||
|
||||
Returns:
|
||||
the transformed observation
|
||||
|
@@ -462,7 +462,7 @@ class RecordEpisodeStatistics(
|
||||
|
||||
Change logs:
|
||||
* v0.15.4 - Initially added
|
||||
* v1.0.0 - Removed vector environment support for `wrappers.vector.RecordEpisodeStatistics` and add attribute ``time_queue``
|
||||
* v1.0.0 - Removed vector environment support (see :class:`gymnasium.wrappers.vector.RecordEpisodeStatistics`) and add attribute ``time_queue``
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@@ -81,7 +81,7 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
|
||||
"""Sets the property to freeze/continue the running mean calculation of the observation statistics."""
|
||||
self._update_running_mean = setting
|
||||
|
||||
def observation(self, observations: ObsType) -> ObsType:
|
||||
def observations(self, observations: ObsType) -> ObsType:
|
||||
"""Defines the vector observation normalization function.
|
||||
|
||||
Args:
|
||||
|
@@ -69,7 +69,7 @@ class TransformObservation(VectorObservationWrapper):
|
||||
|
||||
self.func = func
|
||||
|
||||
def observation(self, observations: ObsType) -> ObsType:
|
||||
def observations(self, observations: ObsType) -> ObsType:
|
||||
"""Apply function to the vector observation."""
|
||||
return self.func(observations)
|
||||
|
||||
@@ -148,7 +148,7 @@ class VectorizeTransformObservation(VectorObservationWrapper):
|
||||
self.same_out = self.observation_space == self.env.observation_space
|
||||
self.out = create_empty_array(self.single_observation_space, self.num_envs)
|
||||
|
||||
def observation(self, observations: ObsType) -> ObsType:
|
||||
def observations(self, observations: ObsType) -> ObsType:
|
||||
"""Iterates over the vector observations applying the single-agent wrapper ``observation`` then concatenates the observations together again."""
|
||||
if self.same_out:
|
||||
return concatenate(
|
||||
|
Reference in New Issue
Block a user