2022-05-10 15:35:45 +01:00
|
|
|
|
"""Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper."""
|
2022-11-12 10:21:24 +00:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from typing import TYPE_CHECKING, Any, Generic, SupportsFloat, TypeVar
|
2021-08-05 16:35:07 +02:00
|
|
|
|
|
2022-08-22 09:20:28 -04:00
|
|
|
|
import numpy as np
|
|
|
|
|
|
2022-09-08 10:10:07 +01:00
|
|
|
|
from gymnasium import spaces
|
|
|
|
|
from gymnasium.utils import seeding
|
2016-05-27 12:16:35 -07:00
|
|
|
|
|
2022-12-04 22:24:02 +08:00
|
|
|
|
|
2022-07-04 18:19:25 +01:00
|
|
|
|
if TYPE_CHECKING:
|
2022-09-08 10:10:07 +01:00
|
|
|
|
from gymnasium.envs.registration import EnvSpec
|
2022-07-04 18:19:25 +01:00
|
|
|
|
|
2021-12-22 19:12:57 +01:00
|
|
|
|
ObsType = TypeVar("ObsType")
|
|
|
|
|
ActType = TypeVar("ActType")
|
2022-06-08 00:20:56 +02:00
|
|
|
|
RenderFrame = TypeVar("RenderFrame")
|
|
|
|
|
|
|
|
|
|
|
2022-08-16 23:19:31 +08:00
|
|
|
|
class Env(Generic[ObsType, ActType]):
|
2022-10-12 15:58:01 +01:00
|
|
|
|
r"""The main Gymnasium class for implementing Reinforcement Learning Agents environments.
|
2022-05-10 15:35:45 +01:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
The class encapsulates an environment with arbitrary behind-the-scenes dynamics through the :meth:`step` and :meth:`reset` functions.
|
|
|
|
|
An environment can be partially or fully observed by single agents. For multi-agent environments, see PettingZoo.
|
2016-04-28 10:33:37 -07:00
|
|
|
|
|
|
|
|
|
The main API methods that users of this class need to know are:
|
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
- :meth:`step` - Updates an environment with actions returning the next agent observation, the reward for taking that actions,
|
|
|
|
|
if the environment has terminated or truncated due to the latest action and information from the environment about the step, i.e. metrics, debug info.
|
|
|
|
|
- :meth:`reset` - Resets the environment to an initial state, required before calling step.
|
|
|
|
|
Returns the first agent observation for an episode and information, i.e. metrics, debug info.
|
|
|
|
|
- :meth:`render` - Renders the environments to help visualise what the agent see, examples modes are "human", "rgb_array", "ansi" for text.
|
|
|
|
|
- :meth:`close` - Closes the environment, important when external software is used, i.e. pygame for rendering, databases
|
|
|
|
|
|
|
|
|
|
Environments have additional attributes for users to understand the implementation
|
|
|
|
|
|
|
|
|
|
- :attr:`action_space` - The Space object corresponding to valid actions, all valid actions should be contained within the space.
|
|
|
|
|
- :attr:`observation_space` - The Space object corresponding to valid observations, all valid observations should be contained within the space.
|
|
|
|
|
- :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards for an agent over an episode.
|
|
|
|
|
The default reward range is set to :math:`(-\infty,+\infty)`.
|
|
|
|
|
- :attr:`spec` - An environment spec that contains the information used to initialize the environment from :meth:`gymnasium.make`
|
|
|
|
|
- :attr:`metadata` - The metadata of the environment, i.e. render modes, render fps
|
|
|
|
|
- :attr:`np_random` - The random number generator for the environment. This is automatically assigned during
|
|
|
|
|
``super().reset(seed=seed)`` and when assessing ``self.np_random``.
|
|
|
|
|
|
|
|
|
|
.. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
# Set this in SOME subclasses
|
2022-11-12 10:21:24 +00:00
|
|
|
|
metadata: dict[str, Any] = {"render_modes": []}
|
2022-08-01 04:53:41 -07:00
|
|
|
|
# define render_mode if your environment supports rendering
|
2022-11-12 10:21:24 +00:00
|
|
|
|
render_mode: str | None = None
|
2021-07-29 02:26:34 +02:00
|
|
|
|
reward_range = (-float("inf"), float("inf"))
|
2022-11-12 10:21:24 +00:00
|
|
|
|
spec: EnvSpec | None = None
|
2016-05-15 15:59:02 -07:00
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
# Set these in ALL subclasses
|
2021-12-22 19:12:57 +01:00
|
|
|
|
action_space: spaces.Space[ActType]
|
|
|
|
|
observation_space: spaces.Space[ObsType]
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2021-12-08 22:14:15 +01:00
|
|
|
|
# Created
|
2022-11-12 10:21:24 +00:00
|
|
|
|
_np_random: np.random.Generator | None = None
|
2022-02-10 18:24:41 +01:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def step(
|
|
|
|
|
self, action: ActType
|
|
|
|
|
) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Run one timestep of the environment's dynamics using the agent actions.
|
2021-12-08 22:14:15 +01:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
When the end of an episode is reached (``terminated or truncated``), it is necessary to call :meth:`reset` to
|
|
|
|
|
reset this environment's state for the next episode.
|
2022-02-18 01:38:22 +01:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
.. versionchanged:: 0.26
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
The Step API was changed removing ``done`` in favor of ``terminated`` and ``truncated`` to make it clearer
|
|
|
|
|
to users when the environment had terminated or truncated which is critical for reinforcement learning
|
|
|
|
|
bootstrapping algorithms.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2016-05-27 12:16:35 -07:00
|
|
|
|
Args:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
action (ActType): an action provided by the agent to update the environment state.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2016-05-27 12:16:35 -07:00
|
|
|
|
Returns:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
observation (ObsType): An element of the environment's :attr:`observation_space` as the next observation due to the agent actions.
|
|
|
|
|
An example is a numpy array containing the positions and velocities of the pole in CartPole.
|
2022-11-12 10:21:24 +00:00
|
|
|
|
reward (SupportsFloat): The reward as a result of taking the action.
|
2022-10-12 15:58:01 +01:00
|
|
|
|
terminated (bool): Whether the agent reaches the terminal state (as defined under the MDP of the task)
|
|
|
|
|
which can be positive or negative. An example is reaching the goal state or moving into the lava from
|
|
|
|
|
the Sutton and Barton, Gridworld. If true, the user needs to call :meth:`reset`.
|
|
|
|
|
truncated (bool): Whether the truncation condition outside the scope of the MDP is satisfied.
|
|
|
|
|
Typically, this is a timelimit, but could also be used to indicate an agent physically going out of bounds.
|
|
|
|
|
Can be used to end the episode prematurely before a terminal state is reached.
|
|
|
|
|
If true, the user needs to call :meth:`reset`.
|
|
|
|
|
info (dict): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging).
|
2022-07-10 02:18:06 +05:30
|
|
|
|
This might, for instance, contain: metrics that describe the agent's performance state, variables that are
|
|
|
|
|
hidden from observations, or individual reward terms that are combined to produce the total reward.
|
2022-10-12 15:58:01 +01:00
|
|
|
|
In OpenAI Gym <v26, it contains "TimeLimit.truncated" to distinguish truncation and termination,
|
|
|
|
|
however this is deprecated in favour of returning terminated and truncated variables.
|
|
|
|
|
done (bool): (Deprecated) A boolean value for if the episode has ended, in which case further :meth:`step` calls will
|
|
|
|
|
return undefined results. This was removed in OpenAI Gym v26 in favor of terminated and truncated attributes.
|
|
|
|
|
A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully,
|
|
|
|
|
a certain timelimit was exceeded, or the physics simulation has entered an invalid state.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
raise NotImplementedError
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-01-19 23:28:59 +01:00
|
|
|
|
def reset(
|
2022-02-06 17:28:27 -06:00
|
|
|
|
self,
|
|
|
|
|
*,
|
2022-11-12 10:21:24 +00:00
|
|
|
|
seed: int | None = None,
|
|
|
|
|
options: dict[str, Any] | None = None,
|
|
|
|
|
) -> tuple[ObsType, dict[str, Any]]: # type: ignore
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Resets the environment to an initial internal state, returning an initial observation and info.
|
|
|
|
|
|
|
|
|
|
This method generates a new starting state often with some randomness to ensure that the agent explores the
|
|
|
|
|
state space and learns a generalised policy about the environment. This randomness can be controlled
|
|
|
|
|
with the ``seed`` parameter otherwise if the environment already has a random number generator and
|
|
|
|
|
:meth:`reset` is called with ``seed=None``, the RNG is not reset.
|
2022-05-10 15:35:45 +01:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
Therefore, :meth:`reset` should (in the typical use case) be called with a seed right after initialization and then never again.
|
|
|
|
|
|
|
|
|
|
For Custom environments, the first line of :meth:`reset` should be ``super().reset(seed=seed)`` which implements
|
|
|
|
|
the seeding correctly.
|
|
|
|
|
|
|
|
|
|
.. versionchanged:: v0.25
|
|
|
|
|
|
|
|
|
|
The ``return_info`` parameter was removed and now info is expected to be returned.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-04-06 20:12:55 +01:00
|
|
|
|
Args:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
seed (optional int): The seed that is used to initialize the environment's PRNG (`np_random`).
|
2022-05-10 15:35:45 +01:00
|
|
|
|
If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed,
|
|
|
|
|
a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom).
|
|
|
|
|
However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset.
|
|
|
|
|
If you pass an integer, the PRNG will be reset even if it already exists.
|
|
|
|
|
Usually, you want to pass an integer *right after the environment has been initialized and then never again*.
|
|
|
|
|
Please refer to the minimal example above to see this paradigm in action.
|
|
|
|
|
options (optional dict): Additional information to specify how the environment is reset (optional,
|
|
|
|
|
depending on the specific environment)
|
2022-04-06 20:12:55 +01:00
|
|
|
|
|
2019-08-23 15:02:33 -07:00
|
|
|
|
Returns:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
observation (ObsType): Observation of the initial state. This will be an element of :attr:`observation_space`
|
2022-05-10 15:35:45 +01:00
|
|
|
|
(typically a numpy array) and is analogous to the observation returned by :meth:`step`.
|
2022-08-23 11:09:54 -04:00
|
|
|
|
info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
|
2022-05-10 15:35:45 +01:00
|
|
|
|
the ``info`` returned by :meth:`step`.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
2022-02-10 18:24:41 +01:00
|
|
|
|
# Initialize the RNG if the seed is manually passed
|
|
|
|
|
if seed is not None:
|
|
|
|
|
self._np_random, seed = seeding.np_random(seed)
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def render(self) -> RenderFrame | list[RenderFrame] | None:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Compute the render frames as specified by :attr:`render_mode` during the initialization of the environment.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
The environment's :attr:`metadata` render modes (`env.metadata["render_modes"]`) should contain the possible
|
|
|
|
|
ways to implement the render modes. In addition, list versions for most render modes is achieved through
|
|
|
|
|
`gymnasium.make` which automatically applies a wrapper to collect rendered frames.
|
|
|
|
|
|
|
|
|
|
Note:
|
|
|
|
|
As the :attr:`render_mode` is known during ``__init__``, the objects used to render the environment state
|
|
|
|
|
should be initialised in ``__init__``.
|
|
|
|
|
|
|
|
|
|
By convention, if the :attr:`render_mode` is:
|
2022-06-08 00:20:56 +02:00
|
|
|
|
|
|
|
|
|
- None (default): no render is computed.
|
2022-10-12 15:58:01 +01:00
|
|
|
|
- "human": The environment is continuously rendered in the current display or terminal, usually for human consumption.
|
|
|
|
|
This rendering should occur during :meth:`step` and :meth:`render` doesn't need to be called. Returns ``None``.
|
|
|
|
|
- "rgb_array": Return a single frame representing the current state of the environment.
|
|
|
|
|
A frame is a ``np.ndarray`` with shape ``(x, y, 3)`` representing RGB values for an x-by-y pixel image.
|
|
|
|
|
- "ansi": Return a strings (``str``) or ``StringIO.StringIO`` containing a terminal-style text representation
|
|
|
|
|
for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors).
|
|
|
|
|
- "rgb_array_list" and "ansi_list": List based version of render modes are possible (except Human) through the
|
|
|
|
|
wrapper, :py:class:`gymnasium.wrappers.RenderCollection` that is automatically applied during ``gymnasium.make(..., render_mode="rgb_array_list")``.
|
|
|
|
|
The frames collected are popped after :meth:`render` is called or :meth:`reset`.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
|
|
Note:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
Make sure that your class's :attr:`metadata` ``"render_modes"`` key includes the list of supported modes.
|
|
|
|
|
|
|
|
|
|
.. versionchanged:: 0.25.0
|
|
|
|
|
|
|
|
|
|
The render function was changed to no longer accept parameters, rather these parameters should be specified
|
|
|
|
|
in the environment initialised, i.e., ``gymnasium.make("CartPole-v1", render_mode="human")``
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
raise NotImplementedError
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2016-05-15 15:59:02 -07:00
|
|
|
|
def close(self):
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""After the user has finished using the environment, close contains the code necessary to "clean up" the environment.
|
|
|
|
|
|
|
|
|
|
This is critical for closing rendering windows, database or HTTP connections.
|
|
|
|
|
"""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
pass
|
2016-05-15 15:59:02 -07:00
|
|
|
|
|
2016-08-17 15:16:45 -07:00
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def unwrapped(self) -> Env[ObsType, ActType]:
|
|
|
|
|
"""Returns the base non-wrapped environment.
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
|
|
|
|
Returns:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
Env: The base non-wrapped :class:`gymnasium.Env` instance
|
2016-08-11 14:45:52 -07:00
|
|
|
|
"""
|
2017-02-26 00:01:00 -08:00
|
|
|
|
return self
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
@property
|
|
|
|
|
def np_random(self) -> np.random.Generator:
|
|
|
|
|
"""Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Instances of `np.random.Generator`
|
|
|
|
|
"""
|
|
|
|
|
if self._np_random is None:
|
|
|
|
|
self._np_random, seed = seeding.np_random()
|
|
|
|
|
return self._np_random
|
|
|
|
|
|
|
|
|
|
@np_random.setter
|
|
|
|
|
def np_random(self, value: np.random.Generator):
|
|
|
|
|
self._np_random = value
|
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
def __str__(self):
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns a string of the environment with :attr:`spec` id's if :attr:`spec.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
A string identifying the environment
|
|
|
|
|
"""
|
2017-06-16 16:35:03 -07:00
|
|
|
|
if self.spec is None:
|
2021-11-14 14:50:40 +01:00
|
|
|
|
return f"<{type(self).__name__} instance>"
|
2017-06-16 16:35:03 -07:00
|
|
|
|
else:
|
2021-11-14 14:50:40 +01:00
|
|
|
|
return f"<{type(self).__name__}<{self.spec.id}>>"
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2019-02-25 15:53:58 -08:00
|
|
|
|
def __enter__(self):
|
2021-07-29 02:26:34 +02:00
|
|
|
|
"""Support with-statement for the environment."""
|
2019-02-25 15:53:58 -08:00
|
|
|
|
return self
|
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def __exit__(self, *args: Any):
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Support with-statement for the environment and closes the environment."""
|
2019-02-25 15:53:58 -08:00
|
|
|
|
self.close()
|
|
|
|
|
# propagate exception
|
|
|
|
|
return False
|
|
|
|
|
|
2018-02-26 17:35:07 +01:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
WrapperObsType = TypeVar("WrapperObsType")
|
|
|
|
|
WrapperActType = TypeVar("WrapperActType")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Wrapper(Env[WrapperObsType, WrapperActType]):
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Wraps a :class:`gymnasium.Env` to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
|
|
|
|
|
|
2022-12-03 13:46:11 +01:00
|
|
|
|
This class is the base class of all wrappers to change the behavior of the underlying environment.
|
|
|
|
|
Wrappers that inherit from this class can modify the :attr:`action_space`, :attr:`observation_space`,
|
|
|
|
|
:attr:`reward_range` and :attr:`metadata` attributes, without changing the underlying environment's attributes.
|
|
|
|
|
Moreover, the behavior of the :meth:`step` and :meth:`reset` methods can be changed by these wrappers.
|
2022-10-12 15:58:01 +01:00
|
|
|
|
|
2022-12-03 13:46:11 +01:00
|
|
|
|
Some attributes (:attr:`spec`, :attr:`render_mode`, :attr:`np_random`) will point back to the wrapper's environment
|
|
|
|
|
(i.e. to the corresponding attributes of :attr:`env`).
|
2019-08-23 15:02:33 -07:00
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
Note:
|
2022-12-03 13:46:11 +01:00
|
|
|
|
If you inherit from :class:`Wrapper`, don't forget to call ``super().__init__(env)``
|
2019-05-03 23:53:31 +02:00
|
|
|
|
"""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def __init__(self, env: Env[ObsType, ActType]):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
env: The environment to wrap
|
|
|
|
|
"""
|
2016-08-13 19:24:48 -07:00
|
|
|
|
self.env = env
|
2021-09-17 18:02:59 -04:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
self._action_space: spaces.Space[WrapperActType] | None = None
|
|
|
|
|
self._observation_space: spaces.Space[WrapperObsType] | None = None
|
|
|
|
|
self._reward_range: tuple[SupportsFloat, SupportsFloat] | None = None
|
|
|
|
|
self._metadata: dict[str, Any] | None = None
|
2016-12-23 16:21:42 -08:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def __getattr__(self, name: str):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns an attribute with ``name``, unless ``name`` starts with an underscore."""
|
2022-11-12 10:21:24 +00:00
|
|
|
|
if name == "_np_random":
|
|
|
|
|
raise AttributeError(
|
|
|
|
|
"Can't access `_np_random` of a wrapper, use `self.unwrapped._np_random` or `self.np_random`."
|
|
|
|
|
)
|
|
|
|
|
elif name.startswith("_"):
|
2022-03-24 19:10:06 +01:00
|
|
|
|
raise AttributeError(f"accessing private attribute '{name}' is prohibited")
|
2019-03-25 20:11:53 +01:00
|
|
|
|
return getattr(self.env, name)
|
|
|
|
|
|
2019-06-28 15:27:43 -07:00
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def spec(self) -> EnvSpec | None:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns the :attr:`Env` :attr:`spec` attribute."""
|
2019-06-28 15:27:43 -07:00
|
|
|
|
return self.env.spec
|
|
|
|
|
|
2016-12-23 16:21:42 -08:00
|
|
|
|
@classmethod
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def class_name(cls) -> str:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the class name of the wrapper."""
|
2016-12-23 16:21:42 -08:00
|
|
|
|
return cls.__name__
|
|
|
|
|
|
2021-09-17 18:02:59 -04:00
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def action_space(
|
|
|
|
|
self,
|
|
|
|
|
) -> spaces.Space[ActType] | spaces.Space[WrapperActType]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Return the :attr:`Env` :attr:`action_space` unless overwritten then the wrapper :attr:`action_space` is used."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._action_space is None:
|
|
|
|
|
return self.env.action_space
|
|
|
|
|
return self._action_space
|
|
|
|
|
|
|
|
|
|
@action_space.setter
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def action_space(self, space: spaces.Space[WrapperActType]):
|
2021-09-17 18:02:59 -04:00
|
|
|
|
self._action_space = space
|
|
|
|
|
|
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def observation_space(
|
|
|
|
|
self,
|
|
|
|
|
) -> spaces.Space[ObsType] | spaces.Space[WrapperObsType]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Return the :attr:`Env` :attr:`observation_space` unless overwritten then the wrapper :attr:`observation_space` is used."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._observation_space is None:
|
|
|
|
|
return self.env.observation_space
|
|
|
|
|
return self._observation_space
|
|
|
|
|
|
|
|
|
|
@observation_space.setter
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def observation_space(self, space: spaces.Space[WrapperObsType]):
|
2021-09-17 18:02:59 -04:00
|
|
|
|
self._observation_space = space
|
|
|
|
|
|
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def reward_range(self) -> tuple[SupportsFloat, SupportsFloat]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Return the :attr:`Env` :attr:`reward_range` unless overwritten then the wrapper :attr:`reward_range` is used."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._reward_range is None:
|
|
|
|
|
return self.env.reward_range
|
|
|
|
|
return self._reward_range
|
|
|
|
|
|
|
|
|
|
@reward_range.setter
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def reward_range(self, value: tuple[SupportsFloat, SupportsFloat]):
|
2021-09-17 18:02:59 -04:00
|
|
|
|
self._reward_range = value
|
|
|
|
|
|
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def metadata(self) -> dict[str, Any]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns the :attr:`Env` :attr:`metadata`."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._metadata is None:
|
|
|
|
|
return self.env.metadata
|
|
|
|
|
return self._metadata
|
|
|
|
|
|
|
|
|
|
@metadata.setter
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def metadata(self, value: dict[str, Any]):
|
2021-09-17 18:02:59 -04:00
|
|
|
|
self._metadata = value
|
|
|
|
|
|
2022-06-13 03:55:24 +02:00
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def render_mode(self) -> str | None:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns the :attr:`Env` :attr:`render_mode`."""
|
2022-06-13 03:55:24 +02:00
|
|
|
|
return self.env.render_mode
|
|
|
|
|
|
2022-06-06 14:56:51 +01:00
|
|
|
|
@property
|
2022-08-22 09:20:28 -04:00
|
|
|
|
def np_random(self) -> np.random.Generator:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns the :attr:`Env` :attr:`np_random` attribute."""
|
2022-06-06 14:56:51 +01:00
|
|
|
|
return self.env.np_random
|
|
|
|
|
|
|
|
|
|
@np_random.setter
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def np_random(self, value: np.random.Generator):
|
2022-06-06 14:56:51 +01:00
|
|
|
|
self.env.np_random = value
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def _np_random(self):
|
2022-11-12 10:21:24 +00:00
|
|
|
|
"""This code will never be run due to __getattr__ being called prior this.
|
|
|
|
|
|
|
|
|
|
It seems that @property overwrites the variable (`_np_random`) meaning that __getattr__ gets called with the missing variable.
|
|
|
|
|
"""
|
2022-06-06 14:56:51 +01:00
|
|
|
|
raise AttributeError(
|
|
|
|
|
"Can't access `_np_random` of a wrapper, use `.unwrapped._np_random` or `.np_random`."
|
|
|
|
|
)
|
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def step(
|
|
|
|
|
self, action: WrapperActType
|
|
|
|
|
) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Uses the :meth:`step` of the :attr:`env` that can be overwritten to change the returned data."""
|
2022-08-30 19:41:59 +05:30
|
|
|
|
return self.env.step(action)
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def reset(
|
|
|
|
|
self, *, seed: int | None = None, options: dict[str, Any] | None = None
|
|
|
|
|
) -> tuple[WrapperObsType, dict[str, Any]]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Uses the :meth:`reset` of the :attr:`env` that can be overwritten to change the returned data."""
|
2022-11-12 10:21:24 +00:00
|
|
|
|
return self.env.reset(seed=seed, options=options)
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def render(self) -> RenderFrame | list[RenderFrame] | None:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Uses the :meth:`render` of the :attr:`env` that can be overwritten to change the returned data."""
|
2022-11-12 10:21:24 +00:00
|
|
|
|
return self.env.render()
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
def close(self):
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Closes the wrapper and :attr:`env`."""
|
2019-03-25 18:23:14 +01:00
|
|
|
|
return self.env.close()
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
|
|
|
|
def __str__(self):
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns the wrapper name and the :attr:`env` representation string."""
|
2021-11-14 14:50:40 +01:00
|
|
|
|
return f"<{type(self).__name__}{self.env}>"
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the string representation of the wrapper."""
|
2016-09-04 00:38:03 -07:00
|
|
|
|
return str(self)
|
2016-08-17 15:16:45 -07:00
|
|
|
|
|
|
|
|
|
@property
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def unwrapped(self) -> Env[ObsType, ActType]:
|
2022-12-03 13:46:11 +01:00
|
|
|
|
"""Returns the base environment of the wrapper.
|
|
|
|
|
|
|
|
|
|
This will be the bare :class:`gymnasium.Env` environment, underneath all layers of wrappers.
|
|
|
|
|
"""
|
2017-02-26 00:01:00 -08:00
|
|
|
|
return self.env.unwrapped
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
class ObservationWrapper(Wrapper[WrapperObsType, ActType]):
|
2022-05-13 13:58:19 +01:00
|
|
|
|
"""Superclass of wrappers that can modify observations using :meth:`observation` for :meth:`reset` and :meth:`step`.
|
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
If you would like to apply a function to only the observation before
|
2022-10-10 14:19:17 +02:00
|
|
|
|
passing it to the learning code, you can simply inherit from :class:`ObservationWrapper` and overwrite the method
|
2022-05-13 13:58:19 +01:00
|
|
|
|
:meth:`observation` to implement that transformation. The transformation defined in that method must be
|
2022-10-12 15:58:01 +01:00
|
|
|
|
reflected by the :attr:`env` observation space. Otherwise, you need to specify the new observation space of the
|
|
|
|
|
wrapper by setting :attr:`self.observation_space` in the :meth:`__init__` method of your wrapper.
|
2022-05-13 13:58:19 +01:00
|
|
|
|
|
2022-09-08 10:58:14 +01:00
|
|
|
|
Among others, Gymnasium provides the observation wrapper :class:`TimeAwareObservation`, which adds information about the
|
2022-05-13 13:58:19 +01:00
|
|
|
|
index of the timestep to the observation.
|
|
|
|
|
"""
|
2022-05-10 15:35:45 +01:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def __init__(self, env: Env[ObsType, ActType]):
|
|
|
|
|
"""Constructor for the observation wrapper."""
|
|
|
|
|
super().__init__(env)
|
|
|
|
|
|
|
|
|
|
def reset(
|
|
|
|
|
self, *, seed: int | None = None, options: dict[str, Any] | None = None
|
|
|
|
|
) -> tuple[WrapperObsType, dict[str, Any]]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Modifies the :attr:`env` after calling :meth:`reset`, returning a modified observation using :meth:`self.observation`."""
|
2022-11-12 10:21:24 +00:00
|
|
|
|
obs, info = self.env.reset(seed=seed, options=options)
|
2022-08-23 11:09:54 -04:00
|
|
|
|
return self.observation(obs), info
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def step(
|
|
|
|
|
self, action: ActType
|
|
|
|
|
) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Modifies the :attr:`env` after calling :meth:`step` using :meth:`self.observation` on the returned observations."""
|
2022-08-30 19:41:59 +05:30
|
|
|
|
observation, reward, terminated, truncated, info = self.env.step(action)
|
|
|
|
|
return self.observation(observation), reward, terminated, truncated, info
|
2019-05-03 23:53:31 +02:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def observation(self, observation: ObsType) -> WrapperObsType:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns a modified observation.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
observation: The :attr:`env` observation
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The modified observation
|
|
|
|
|
"""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
raise NotImplementedError
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
class RewardWrapper(Wrapper[ObsType, ActType]):
|
2022-05-13 13:58:19 +01:00
|
|
|
|
"""Superclass of wrappers that can modify the returning reward from a step.
|
|
|
|
|
|
|
|
|
|
If you would like to apply a function to the reward that is returned by the base environment before
|
|
|
|
|
passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method
|
|
|
|
|
:meth:`reward` to implement that transformation.
|
2022-10-12 15:58:01 +01:00
|
|
|
|
This transformation might change the :attr:`reward_range`; to specify the :attr:`reward_range` of your wrapper,
|
2022-05-13 13:58:19 +01:00
|
|
|
|
you can simply define :attr:`self.reward_range` in :meth:`__init__`.
|
|
|
|
|
"""
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def __init__(self, env: Env[ObsType, ActType]):
|
|
|
|
|
"""Constructor for the Reward wrapper."""
|
|
|
|
|
super().__init__(env)
|
|
|
|
|
|
|
|
|
|
def step(
|
|
|
|
|
self, action: ActType
|
|
|
|
|
) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Modifies the :attr:`env` :meth:`step` reward using :meth:`self.reward`."""
|
2022-08-30 19:41:59 +05:30
|
|
|
|
observation, reward, terminated, truncated, info = self.env.step(action)
|
|
|
|
|
return observation, self.reward(reward), terminated, truncated, info
|
2016-09-04 01:44:20 -07:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def reward(self, reward: SupportsFloat) -> SupportsFloat:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns a modified environment ``reward``.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
reward: The :attr:`env` :meth:`step` reward
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The modified `reward`
|
|
|
|
|
"""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
raise NotImplementedError
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
class ActionWrapper(Wrapper[ObsType, WrapperActType]):
|
2022-05-13 13:58:19 +01:00
|
|
|
|
"""Superclass of wrappers that can modify the action before :meth:`env.step`.
|
|
|
|
|
|
|
|
|
|
If you would like to apply a function to the action before passing it to the base environment,
|
|
|
|
|
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
|
|
|
|
|
that transformation. The transformation defined in that method must take values in the base environment’s
|
|
|
|
|
action space. However, its domain might differ from the original action space.
|
|
|
|
|
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in
|
|
|
|
|
the :meth:`__init__` method of your wrapper.
|
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions.
|
2022-05-13 13:58:19 +01:00
|
|
|
|
"""
|
2019-03-25 18:23:14 +01:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def __init__(self, env: Env[ObsType, ActType]):
|
|
|
|
|
"""Constructor for the action wrapper."""
|
|
|
|
|
super().__init__(env)
|
|
|
|
|
|
|
|
|
|
def step(
|
|
|
|
|
self, action: WrapperActType
|
|
|
|
|
) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Runs the :attr:`env` :meth:`env.step` using the modified ``action`` from :meth:`self.action`."""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
return self.env.step(self.action(action))
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
2022-11-12 10:21:24 +00:00
|
|
|
|
def action(self, action: WrapperActType) -> ActType:
|
2022-10-12 15:58:01 +01:00
|
|
|
|
"""Returns a modified action before :meth:`env.step` is called.
|
2016-09-04 01:44:20 -07:00
|
|
|
|
|
2022-10-12 15:58:01 +01:00
|
|
|
|
Args:
|
|
|
|
|
action: The original :meth:`step` actions
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The modified actions
|
|
|
|
|
"""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
raise NotImplementedError
|