mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-23 15:04:20 +00:00
Update vector wrapper docs and add Env generic to doc (#847)
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
title: Vector
|
title: Vector
|
||||||
---
|
---
|
||||||
|
|
||||||
# Vector environments
|
# Vectorize
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:hidden:
|
:hidden:
|
||||||
@@ -19,10 +19,10 @@ vector/utils
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Methods
|
### Methods
|
||||||
|
|
||||||
```{eval-rst}
|
```{eval-rst}
|
||||||
.. automethod:: gymnasium.vector.VectorEnv.reset
|
|
||||||
.. automethod:: gymnasium.vector.VectorEnv.step
|
.. automethod:: gymnasium.vector.VectorEnv.step
|
||||||
|
.. automethod:: gymnasium.vector.VectorEnv.reset
|
||||||
|
.. automethod:: gymnasium.vector.VectorEnv.render
|
||||||
.. automethod:: gymnasium.vector.VectorEnv.close
|
.. automethod:: gymnasium.vector.VectorEnv.close
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -47,11 +47,19 @@ vector/utils
|
|||||||
|
|
||||||
.. autoattribute:: gymnasium.vector.VectorEnv.single_observation_space
|
.. autoattribute:: gymnasium.vector.VectorEnv.single_observation_space
|
||||||
|
|
||||||
The observation space of an environment copy.
|
The observation space of a sub-environment.
|
||||||
|
|
||||||
.. autoattribute:: gymnasium.vector.VectorEnv.spec
|
.. autoattribute:: gymnasium.vector.VectorEnv.spec
|
||||||
|
|
||||||
The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec`
|
The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make_vec`
|
||||||
|
|
||||||
|
.. autoattribute:: gymnasium.vector.VectorEnv.render_mode
|
||||||
|
|
||||||
|
The render mode of the environment which should follow similar specifications to `Env.render_mode`.
|
||||||
|
|
||||||
|
.. autoattribute:: gymnasium.vector.VectorEnv.closed
|
||||||
|
|
||||||
|
If the vector environment has been closed already.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Additional Methods
|
### Additional Methods
|
||||||
|
@@ -2,19 +2,19 @@
|
|||||||
title: Vector Wrappers
|
title: Vector Wrappers
|
||||||
---
|
---
|
||||||
|
|
||||||
# Vector Wrappers
|
# Wrappers
|
||||||
|
|
||||||
```{eval-rst}
|
```{eval-rst}
|
||||||
.. autoclass:: gymnasium.vector.VectorWrapper
|
.. autoclass:: gymnasium.vector.VectorWrapper
|
||||||
|
|
||||||
.. automethod:: gymnasium.vector.VectorWrapper.step
|
.. automethod:: gymnasium.vector.VectorWrapper.step
|
||||||
.. automethod:: gymnasium.vector.VectorWrapper.reset
|
.. automethod:: gymnasium.vector.VectorWrapper.reset
|
||||||
|
.. automethod:: gymnasium.vector.VectorWrapper.render
|
||||||
.. automethod:: gymnasium.vector.VectorWrapper.close
|
.. automethod:: gymnasium.vector.VectorWrapper.close
|
||||||
|
|
||||||
.. autoclass:: gymnasium.vector.VectorObservationWrapper
|
.. autoclass:: gymnasium.vector.VectorObservationWrapper
|
||||||
|
|
||||||
.. automethod:: gymnasium.vector.VectorObservationWrapper.vector_observation
|
.. automethod:: gymnasium.vector.VectorObservationWrapper.observations
|
||||||
.. automethod:: gymnasium.vector.VectorObservationWrapper.single_observation
|
|
||||||
|
|
||||||
.. autoclass:: gymnasium.vector.VectorActionWrapper
|
.. autoclass:: gymnasium.vector.VectorActionWrapper
|
||||||
|
|
||||||
@@ -24,3 +24,57 @@ title: Vector Wrappers
|
|||||||
|
|
||||||
.. automethod:: gymnasium.vector.VectorRewardWrapper.rewards
|
.. automethod:: gymnasium.vector.VectorRewardWrapper.rewards
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Vector Only wrappers
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
|
||||||
|
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
|
||||||
|
```
|
||||||
|
|
||||||
|
## Vectorized Common wrappers
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.RecordEpisodeStatistics
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implemented Observation wrappers
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.TransformObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.FilterObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.FlattenObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.GrayscaleObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.ResizeObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.ReshapeObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.RescaleObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.DtypeObservation
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.NormalizeObservation
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implemented Action wrappers
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.TransformAction
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.ClipAction
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.RescaleAction
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implemented Reward wrappers
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.TransformReward
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.ClipReward
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.NormalizeReward
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implemented Data Conversion wrappers
|
||||||
|
|
||||||
|
```{eval-rst}
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.JaxToNumpy
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.JaxToTorch
|
||||||
|
.. autoclass:: gymnasium.wrappers.vector.NumpyToTorch
|
||||||
|
```
|
||||||
|
@@ -78,7 +78,6 @@ wrapper in the page on the wrapper type
|
|||||||
- Applies a function to the ``observation`` received from the environment's ``reset`` and ``step`` that is passed back to the user.
|
- Applies a function to the ``observation`` received from the environment's ``reset`` and ``step`` that is passed back to the user.
|
||||||
* - :class:`TransformReward`
|
* - :class:`TransformReward`
|
||||||
- Applies a function to the ``reward`` received from the environment's ``step``.
|
- Applies a function to the ``reward`` received from the environment's ``step``.
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Vector only Wrappers
|
## Vector only Wrappers
|
||||||
|
@@ -1,19 +0,0 @@
|
|||||||
---
|
|
||||||
title: Vector Wrappers
|
|
||||||
---
|
|
||||||
|
|
||||||
# Vector wrappers
|
|
||||||
|
|
||||||
## Vector only wrappers
|
|
||||||
|
|
||||||
```{eval-rst}
|
|
||||||
.. autoclass:: gymnasium.wrappers.vector.DictInfoToList
|
|
||||||
```
|
|
||||||
|
|
||||||
## Vectorize Transform Wrappers to Vector Wrappers
|
|
||||||
|
|
||||||
```{eval-rst}
|
|
||||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformObservation
|
|
||||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformAction
|
|
||||||
.. autoclass:: gymnasium.wrappers.vector.VectorizeTransformReward
|
|
||||||
```
|
|
@@ -42,10 +42,16 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
- :attr:`np_random` - The random number generator for the environment. This is automatically assigned during
|
- :attr:`np_random` - The random number generator for the environment. This is automatically assigned during
|
||||||
``super().reset(seed=seed)`` and when assessing :attr:`np_random`.
|
``super().reset(seed=seed)`` and when assessing :attr:`np_random`.
|
||||||
|
|
||||||
.. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class
|
.. seealso:: For modifying or extending environments use the :class:`gymnasium.Wrapper` class
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``.
|
To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
For strict type checking (e.g., mypy or pyright), :class:`Env` is a generic class with two parameterized types: ``ObsType`` and ``ActType``.
|
||||||
|
The ``ObsType`` and ``ActType`` are the expected types of the observations and actions used in :meth:`reset` and :meth:`step`.
|
||||||
|
The environment's :attr:`observation_space` and :attr:`action_space` should have type ``Space[ObsType]`` and ``Space[ActType]``,
|
||||||
|
see a space's implementation to find its parameterized type.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Set this in SOME subclasses
|
# Set this in SOME subclasses
|
||||||
@@ -473,7 +479,11 @@ class ObservationWrapper(Wrapper[WrapperObsType, ActType, ObsType, ActType]):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, env: Env[ObsType, ActType]):
|
def __init__(self, env: Env[ObsType, ActType]):
|
||||||
"""Constructor for the observation wrapper."""
|
"""Constructor for the observation wrapper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env: Environment to be wrapped.
|
||||||
|
"""
|
||||||
Wrapper.__init__(self, env)
|
Wrapper.__init__(self, env)
|
||||||
|
|
||||||
def reset(
|
def reset(
|
||||||
@@ -513,7 +523,11 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, env: Env[ObsType, ActType]):
|
def __init__(self, env: Env[ObsType, ActType]):
|
||||||
"""Constructor for the Reward wrapper."""
|
"""Constructor for the Reward wrapper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env: Environment to be wrapped.
|
||||||
|
"""
|
||||||
Wrapper.__init__(self, env)
|
Wrapper.__init__(self, env)
|
||||||
|
|
||||||
def step(
|
def step(
|
||||||
@@ -536,20 +550,25 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
|
|||||||
|
|
||||||
|
|
||||||
class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
|
class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
|
||||||
"""Superclass of wrappers that can modify the action before :meth:`env.step`.
|
"""Superclass of wrappers that can modify the action before :meth:`step`.
|
||||||
|
|
||||||
If you would like to apply a function to the action before passing it to the base environment,
|
If you would like to apply a function to the action before passing it to the base environment,
|
||||||
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
|
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
|
||||||
that transformation. The transformation defined in that method must take values in the base environment’s
|
that transformation. The transformation defined in that method must take values in the base environment’s
|
||||||
action space. However, its domain might differ from the original action space.
|
action space. However, its domain might differ from the original action space.
|
||||||
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in
|
In that case, you need to specify the new action space of the wrapper by setting :attr:`action_space` in
|
||||||
the :meth:`__init__` method of your wrapper.
|
the :meth:`__init__` method of your wrapper.
|
||||||
|
|
||||||
Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions.
|
Among others, Gymnasium provides the action wrappers :class:`gymnasium.wrappers.ClipAction` and
|
||||||
|
:class:`gymnasium.wrappers.RescaleAction` for clipping and rescaling actions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, env: Env[ObsType, ActType]):
|
def __init__(self, env: Env[ObsType, ActType]):
|
||||||
"""Constructor for the action wrapper."""
|
"""Constructor for the action wrapper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env: Environment to be wrapped.
|
||||||
|
"""
|
||||||
Wrapper.__init__(self, env)
|
Wrapper.__init__(self, env)
|
||||||
|
|
||||||
def step(
|
def step(
|
||||||
@@ -559,7 +578,7 @@ class ActionWrapper(Wrapper[ObsType, WrapperActType, ObsType, ActType]):
|
|||||||
return self.env.step(self.action(action))
|
return self.env.step(self.action(action))
|
||||||
|
|
||||||
def action(self, action: WrapperActType) -> ActType:
|
def action(self, action: WrapperActType) -> ActType:
|
||||||
"""Returns a modified action before :meth:`env.step` is called.
|
"""Returns a modified action before :meth:`step` is called.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
action: The original :meth:`step` actions
|
action: The original :meth:`step` actions
|
||||||
|
@@ -606,6 +606,7 @@ class BipedalWalker(gym.Env, EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
|
@@ -673,6 +673,7 @@ class LunarLander(gym.Env, EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
|
@@ -225,7 +225,8 @@ class AcrobotEnv(Env):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
return (self._get_ob(), reward, terminated, False, {})
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
|
return self._get_ob(), reward, terminated, False, {}
|
||||||
|
|
||||||
def _get_ob(self):
|
def _get_ob(self):
|
||||||
s = self.state
|
s = self.state
|
||||||
|
@@ -204,6 +204,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
||||||
|
|
||||||
def reset(
|
def reset(
|
||||||
|
@@ -175,6 +175,7 @@ class Continuous_MountainCarEnv(gym.Env):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return self.state, reward, terminated, False, {}
|
return self.state, reward, terminated, False, {}
|
||||||
|
|
||||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||||
|
@@ -146,6 +146,7 @@ class MountainCarEnv(gym.Env):
|
|||||||
self.state = (position, velocity)
|
self.state = (position, velocity)
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
||||||
|
|
||||||
def reset(
|
def reset(
|
||||||
|
@@ -141,6 +141,7 @@ class PendulumEnv(gym.Env):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return self._get_obs(), -costs, False, False, {}
|
return self._get_obs(), -costs, False, False, {}
|
||||||
|
|
||||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||||
|
@@ -45,6 +45,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
ob,
|
ob,
|
||||||
reward,
|
reward,
|
||||||
|
@@ -147,6 +147,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -159,6 +159,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -390,6 +390,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_rew(self, x_velocity: float, action):
|
def _get_rew(self, x_velocity: float, action):
|
||||||
|
@@ -35,6 +35,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
ob,
|
ob,
|
||||||
reward,
|
reward,
|
||||||
|
@@ -90,6 +90,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -93,6 +93,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -251,6 +251,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, False, False, info
|
return observation, reward, False, False, info
|
||||||
|
|
||||||
def _get_rew(self, x_velocity: float, action):
|
def _get_rew(self, x_velocity: float, action):
|
||||||
|
@@ -42,6 +42,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return ob, reward, terminated, False, {}
|
return ob, reward, terminated, False, {}
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -151,6 +151,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -152,6 +152,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -328,6 +328,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_rew(self, x_velocity: float, action):
|
def _get_rew(self, x_velocity: float, action):
|
||||||
|
@@ -60,6 +60,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
self._get_obs(),
|
self._get_obs(),
|
||||||
reward,
|
reward,
|
||||||
|
@@ -174,6 +174,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -161,6 +161,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -513,6 +513,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_rew(self, x_velocity: float, action):
|
def _get_rew(self, x_velocity: float, action):
|
||||||
|
@@ -54,6 +54,7 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
self._get_obs(),
|
self._get_obs(),
|
||||||
reward,
|
reward,
|
||||||
|
@@ -63,6 +63,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
self._get_obs(),
|
self._get_obs(),
|
||||||
reward,
|
reward,
|
||||||
|
@@ -456,6 +456,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return self._get_obs(), reward, False, False, info
|
return self._get_obs(), reward, False, False, info
|
||||||
|
|
||||||
def _get_rew(self, pos_after: float, action):
|
def _get_rew(self, pos_after: float, action):
|
||||||
|
@@ -40,6 +40,7 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return ob, r, terminated, False, {}
|
return ob, r, terminated, False, {}
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -46,6 +46,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
|
|||||||
terminated = bool(y <= 1)
|
terminated = bool(y <= 1)
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return ob, r, terminated, False, {}
|
return ob, r, terminated, False, {}
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -203,6 +203,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_rew(self, x, y, terminated):
|
def _get_rew(self, x, y, terminated):
|
||||||
|
@@ -35,6 +35,7 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return ob, reward, terminated, False, {}
|
return ob, reward, terminated, False, {}
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -40,6 +40,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
|
|||||||
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
|
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return ob, reward, terminated, False, {}
|
return ob, reward, terminated, False, {}
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -186,6 +186,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -36,6 +36,7 @@ class PusherEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
self.render()
|
self.render()
|
||||||
|
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
ob,
|
ob,
|
||||||
reward,
|
reward,
|
||||||
|
@@ -47,6 +47,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.render()
|
self.render()
|
||||||
|
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
ob,
|
ob,
|
||||||
reward,
|
reward,
|
||||||
|
@@ -226,9 +226,10 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
observation = self._get_obs()
|
observation = self._get_obs()
|
||||||
reward, reward_info = self._get_rew(action)
|
reward, reward_info = self._get_rew(action)
|
||||||
info = reward_info
|
info = reward_info
|
||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, False, False, info
|
return observation, reward, False, False, info
|
||||||
|
|
||||||
def _get_rew(self, action):
|
def _get_rew(self, action):
|
||||||
|
@@ -33,6 +33,7 @@ class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
self.render()
|
self.render()
|
||||||
|
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
ob,
|
ob,
|
||||||
reward,
|
reward,
|
||||||
|
@@ -41,6 +41,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.render()
|
self.render()
|
||||||
|
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
ob,
|
ob,
|
||||||
reward,
|
reward,
|
||||||
|
@@ -206,6 +206,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, False, False, info
|
return observation, reward, False, False, info
|
||||||
|
|
||||||
def _get_rew(self, action):
|
def _get_rew(self, action):
|
||||||
|
@@ -36,6 +36,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return (
|
return (
|
||||||
ob,
|
ob,
|
||||||
reward,
|
reward,
|
||||||
|
@@ -91,7 +91,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, False, False, info
|
return observation, reward, False, False, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -85,7 +85,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, False, False, info
|
return observation, reward, False, False, info
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -247,7 +247,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, False, False, info
|
return observation, reward, False, False, info
|
||||||
|
|
||||||
def _get_rew(self, x_velocity: float, action):
|
def _get_rew(self, x_velocity: float, action):
|
||||||
|
@@ -36,7 +36,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return ob, reward, terminated, False, {}
|
return ob, reward, terminated, False, {}
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -140,7 +140,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -144,7 +144,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -322,7 +322,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
def _get_rew(self, x_velocity: float, action):
|
def _get_rew(self, x_velocity: float, action):
|
||||||
|
@@ -193,6 +193,7 @@ class BlackjackEnv(gym.Env):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
return self._get_obs(), reward, terminated, False, {}
|
return self._get_obs(), reward, terminated, False, {}
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
|
@@ -179,7 +179,8 @@ class CliffWalkingEnv(Env):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
return (int(s), r, t, False, {"prob": p})
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
|
return int(s), r, t, False, {"prob": p}
|
||||||
|
|
||||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||||
super().reset(seed=seed)
|
super().reset(seed=seed)
|
||||||
|
@@ -306,7 +306,8 @@ class FrozenLakeEnv(Env):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
return (int(s), r, t, False, {"prob": p})
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
|
return int(s), r, t, False, {"prob": p}
|
||||||
|
|
||||||
def reset(
|
def reset(
|
||||||
self,
|
self,
|
||||||
|
@@ -291,7 +291,8 @@ class TaxiEnv(Env):
|
|||||||
|
|
||||||
if self.render_mode == "human":
|
if self.render_mode == "human":
|
||||||
self.render()
|
self.render()
|
||||||
return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)})
|
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||||
|
return int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)}
|
||||||
|
|
||||||
def reset(
|
def reset(
|
||||||
self,
|
self,
|
||||||
|
@@ -445,7 +445,7 @@ class VectorObservationWrapper(VectorWrapper):
|
|||||||
) -> tuple[ObsType, dict[str, Any]]:
|
) -> tuple[ObsType, dict[str, Any]]:
|
||||||
"""Modifies the observation returned from the environment ``reset`` using the :meth:`observation`."""
|
"""Modifies the observation returned from the environment ``reset`` using the :meth:`observation`."""
|
||||||
observations, infos = self.env.reset(seed=seed, options=options)
|
observations, infos = self.env.reset(seed=seed, options=options)
|
||||||
return self.observation(observations), infos
|
return self.observations(observations), infos
|
||||||
|
|
||||||
def step(
|
def step(
|
||||||
self, actions: ActType
|
self, actions: ActType
|
||||||
@@ -453,18 +453,18 @@ class VectorObservationWrapper(VectorWrapper):
|
|||||||
"""Modifies the observation returned from the environment ``step`` using the :meth:`observation`."""
|
"""Modifies the observation returned from the environment ``step`` using the :meth:`observation`."""
|
||||||
observations, rewards, terminations, truncations, infos = self.env.step(actions)
|
observations, rewards, terminations, truncations, infos = self.env.step(actions)
|
||||||
return (
|
return (
|
||||||
self.observation(observations),
|
self.observations(observations),
|
||||||
rewards,
|
rewards,
|
||||||
terminations,
|
terminations,
|
||||||
truncations,
|
truncations,
|
||||||
infos,
|
infos,
|
||||||
)
|
)
|
||||||
|
|
||||||
def observation(self, observation: ObsType) -> ObsType:
|
def observations(self, observations: ObsType) -> ObsType:
|
||||||
"""Defines the vector observation transformation.
|
"""Defines the vector observation transformation.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
observation: A vector observation from the environment
|
observations: A vector observation from the environment
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
the transformed observation
|
the transformed observation
|
||||||
|
@@ -462,7 +462,7 @@ class RecordEpisodeStatistics(
|
|||||||
|
|
||||||
Change logs:
|
Change logs:
|
||||||
* v0.15.4 - Initially added
|
* v0.15.4 - Initially added
|
||||||
* v1.0.0 - Removed vector environment support for `wrappers.vector.RecordEpisodeStatistics` and add attribute ``time_queue``
|
* v1.0.0 - Removed vector environment support (see :class:`gymnasium.wrappers.vector.RecordEpisodeStatistics`) and add attribute ``time_queue``
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@@ -81,7 +81,7 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
|
|||||||
"""Sets the property to freeze/continue the running mean calculation of the observation statistics."""
|
"""Sets the property to freeze/continue the running mean calculation of the observation statistics."""
|
||||||
self._update_running_mean = setting
|
self._update_running_mean = setting
|
||||||
|
|
||||||
def observation(self, observations: ObsType) -> ObsType:
|
def observations(self, observations: ObsType) -> ObsType:
|
||||||
"""Defines the vector observation normalization function.
|
"""Defines the vector observation normalization function.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@@ -69,7 +69,7 @@ class TransformObservation(VectorObservationWrapper):
|
|||||||
|
|
||||||
self.func = func
|
self.func = func
|
||||||
|
|
||||||
def observation(self, observations: ObsType) -> ObsType:
|
def observations(self, observations: ObsType) -> ObsType:
|
||||||
"""Apply function to the vector observation."""
|
"""Apply function to the vector observation."""
|
||||||
return self.func(observations)
|
return self.func(observations)
|
||||||
|
|
||||||
@@ -148,7 +148,7 @@ class VectorizeTransformObservation(VectorObservationWrapper):
|
|||||||
self.same_out = self.observation_space == self.env.observation_space
|
self.same_out = self.observation_space == self.env.observation_space
|
||||||
self.out = create_empty_array(self.single_observation_space, self.num_envs)
|
self.out = create_empty_array(self.single_observation_space, self.num_envs)
|
||||||
|
|
||||||
def observation(self, observations: ObsType) -> ObsType:
|
def observations(self, observations: ObsType) -> ObsType:
|
||||||
"""Iterates over the vector observations applying the single-agent wrapper ``observation`` then concatenates the observations together again."""
|
"""Iterates over the vector observations applying the single-agent wrapper ``observation`` then concatenates the observations together again."""
|
||||||
if self.same_out:
|
if self.same_out:
|
||||||
return concatenate(
|
return concatenate(
|
||||||
|
Reference in New Issue
Block a user