2022-05-10 15:35:45 +01:00
""" Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper. """
2022-11-12 10:21:24 +00:00
from __future__ import annotations
2023-02-24 11:34:20 +00:00
from copy import deepcopy
2022-11-12 10:21:24 +00:00
from typing import TYPE_CHECKING , Any , Generic , SupportsFloat , TypeVar
2021-08-05 16:35:07 +02:00
2022-08-22 09:20:28 -04:00
import numpy as np
2023-11-07 13:27:25 +00:00
from gymnasium import spaces
2023-02-24 11:34:20 +00:00
from gymnasium . utils import RecordConstructorArgs , seeding
2016-05-27 12:16:35 -07:00
2022-12-04 22:24:02 +08:00
2022-07-04 18:19:25 +01:00
if TYPE_CHECKING :
2023-03-08 14:07:09 +00:00
from gymnasium . envs . registration import EnvSpec , WrapperSpec
2022-07-04 18:19:25 +01:00
2021-12-22 19:12:57 +01:00
ObsType = TypeVar ( " ObsType " )
ActType = TypeVar ( " ActType " )
2022-06-08 00:20:56 +02:00
RenderFrame = TypeVar ( " RenderFrame " )
2022-08-16 23:19:31 +08:00
class Env ( Generic [ ObsType , ActType ] ) :
2022-10-12 15:58:01 +01:00
r """ The main Gymnasium class for implementing Reinforcement Learning Agents environments.
2022-05-10 15:35:45 +01:00
2022-10-12 15:58:01 +01:00
The class encapsulates an environment with arbitrary behind - the - scenes dynamics through the : meth : ` step ` and : meth : ` reset ` functions .
An environment can be partially or fully observed by single agents . For multi - agent environments , see PettingZoo .
2016-04-28 10:33:37 -07:00
The main API methods that users of this class need to know are :
2022-10-12 15:58:01 +01:00
- : meth : ` step ` - Updates an environment with actions returning the next agent observation , the reward for taking that actions ,
if the environment has terminated or truncated due to the latest action and information from the environment about the step , i . e . metrics , debug info .
- : meth : ` reset ` - Resets the environment to an initial state , required before calling step .
Returns the first agent observation for an episode and information , i . e . metrics , debug info .
- : meth : ` render ` - Renders the environments to help visualise what the agent see , examples modes are " human " , " rgb_array " , " ansi " for text .
- : meth : ` close ` - Closes the environment , important when external software is used , i . e . pygame for rendering , databases
Environments have additional attributes for users to understand the implementation
- : attr : ` action_space ` - The Space object corresponding to valid actions , all valid actions should be contained within the space .
- : attr : ` observation_space ` - The Space object corresponding to valid observations , all valid observations should be contained within the space .
- : attr : ` spec ` - An environment spec that contains the information used to initialize the environment from : meth : ` gymnasium . make `
- : attr : ` metadata ` - The metadata of the environment , i . e . render modes , render fps
- : attr : ` np_random ` - The random number generator for the environment . This is automatically assigned during
2023-11-07 13:27:25 +00:00
` ` super ( ) . reset ( seed = seed ) ` ` and when assessing : attr : ` np_random ` .
2022-10-12 15:58:01 +01:00
. . seealso : : For modifying or extending environments use the : py : class : ` gymnasium . Wrapper ` class
2023-03-13 19:33:37 -04:00
Note :
To get reproducible sampling of actions , a seed can be set with ` ` env . action_space . seed ( 123 ) ` ` .
2016-04-27 08:00:58 -07:00
"""
2021-07-29 02:26:34 +02:00
2016-04-27 08:00:58 -07:00
# Set this in SOME subclasses
2023-01-09 13:12:07 +00:00
metadata : dict [ str , Any ] = { " render_modes " : [ ] }
2022-08-01 04:53:41 -07:00
# define render_mode if your environment supports rendering
2022-11-12 10:21:24 +00:00
render_mode : str | None = None
spec : EnvSpec | None = None
2016-05-15 15:59:02 -07:00
2016-04-27 08:00:58 -07:00
# Set these in ALL subclasses
2021-12-22 19:12:57 +01:00
action_space : spaces . Space [ ActType ]
observation_space : spaces . Space [ ObsType ]
2016-04-27 08:00:58 -07:00
2021-12-08 22:14:15 +01:00
# Created
2022-11-12 10:21:24 +00:00
_np_random : np . random . Generator | None = None
2022-02-10 18:24:41 +01:00
2022-11-12 10:21:24 +00:00
def step (
self , action : ActType
) - > tuple [ ObsType , SupportsFloat , bool , bool , dict [ str , Any ] ] :
2022-10-12 15:58:01 +01:00
""" Run one timestep of the environment ' s dynamics using the agent actions.
2021-12-08 22:14:15 +01:00
2022-10-12 15:58:01 +01:00
When the end of an episode is reached ( ` ` terminated or truncated ` ` ) , it is necessary to call : meth : ` reset ` to
reset this environment ' s state for the next episode.
2022-02-18 01:38:22 +01:00
2022-10-12 15:58:01 +01:00
. . versionchanged : : 0.26
2016-04-27 08:00:58 -07:00
2022-10-12 15:58:01 +01:00
The Step API was changed removing ` ` done ` ` in favor of ` ` terminated ` ` and ` ` truncated ` ` to make it clearer
to users when the environment had terminated or truncated which is critical for reinforcement learning
bootstrapping algorithms .
2016-04-27 08:00:58 -07:00
2016-05-27 12:16:35 -07:00
Args :
2022-10-12 15:58:01 +01:00
action ( ActType ) : an action provided by the agent to update the environment state .
2016-04-27 08:00:58 -07:00
2016-05-27 12:16:35 -07:00
Returns :
2022-10-12 15:58:01 +01:00
observation ( ObsType ) : An element of the environment ' s :attr:`observation_space` as the next observation due to the agent actions.
An example is a numpy array containing the positions and velocities of the pole in CartPole .
2022-11-12 10:21:24 +00:00
reward ( SupportsFloat ) : The reward as a result of taking the action .
2022-10-12 15:58:01 +01:00
terminated ( bool ) : Whether the agent reaches the terminal state ( as defined under the MDP of the task )
which can be positive or negative . An example is reaching the goal state or moving into the lava from
the Sutton and Barton , Gridworld . If true , the user needs to call : meth : ` reset ` .
truncated ( bool ) : Whether the truncation condition outside the scope of the MDP is satisfied .
Typically , this is a timelimit , but could also be used to indicate an agent physically going out of bounds .
Can be used to end the episode prematurely before a terminal state is reached .
If true , the user needs to call : meth : ` reset ` .
info ( dict ) : Contains auxiliary diagnostic information ( helpful for debugging , learning , and logging ) .
2022-07-10 02:18:06 +05:30
This might , for instance , contain : metrics that describe the agent ' s performance state, variables that are
hidden from observations , or individual reward terms that are combined to produce the total reward .
2022-10-12 15:58:01 +01:00
In OpenAI Gym < v26 , it contains " TimeLimit.truncated " to distinguish truncation and termination ,
however this is deprecated in favour of returning terminated and truncated variables .
done ( bool ) : ( Deprecated ) A boolean value for if the episode has ended , in which case further : meth : ` step ` calls will
return undefined results . This was removed in OpenAI Gym v26 in favor of terminated and truncated attributes .
A done signal may be emitted for different reasons : Maybe the task underlying the environment was solved successfully ,
a certain timelimit was exceeded , or the physics simulation has entered an invalid state .
2016-04-27 08:00:58 -07:00
"""
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
raise NotImplementedError
2016-04-27 08:00:58 -07:00
2022-01-19 23:28:59 +01:00
def reset (
2022-02-06 17:28:27 -06:00
self ,
* ,
2022-11-12 10:21:24 +00:00
seed : int | None = None ,
options : dict [ str , Any ] | None = None ,
) - > tuple [ ObsType , dict [ str , Any ] ] : # type: ignore
2022-10-12 15:58:01 +01:00
""" Resets the environment to an initial internal state, returning an initial observation and info.
This method generates a new starting state often with some randomness to ensure that the agent explores the
state space and learns a generalised policy about the environment . This randomness can be controlled
with the ` ` seed ` ` parameter otherwise if the environment already has a random number generator and
: meth : ` reset ` is called with ` ` seed = None ` ` , the RNG is not reset .
2022-05-10 15:35:45 +01:00
2022-10-12 15:58:01 +01:00
Therefore , : meth : ` reset ` should ( in the typical use case ) be called with a seed right after initialization and then never again .
For Custom environments , the first line of : meth : ` reset ` should be ` ` super ( ) . reset ( seed = seed ) ` ` which implements
the seeding correctly .
. . versionchanged : : v0 .25
The ` ` return_info ` ` parameter was removed and now info is expected to be returned .
2016-04-27 08:00:58 -07:00
2022-04-06 20:12:55 +01:00
Args :
2022-10-12 15:58:01 +01:00
seed ( optional int ) : The seed that is used to initialize the environment ' s PRNG (`np_random`).
2022-05-10 15:35:45 +01:00
If the environment does not already have a PRNG and ` ` seed = None ` ` ( the default option ) is passed ,
a seed will be chosen from some source of entropy ( e . g . timestamp or / dev / urandom ) .
However , if the environment already has a PRNG and ` ` seed = None ` ` is passed , the PRNG will * not * be reset .
If you pass an integer , the PRNG will be reset even if it already exists .
Usually , you want to pass an integer * right after the environment has been initialized and then never again * .
Please refer to the minimal example above to see this paradigm in action .
options ( optional dict ) : Additional information to specify how the environment is reset ( optional ,
depending on the specific environment )
2022-04-06 20:12:55 +01:00
2019-08-23 15:02:33 -07:00
Returns :
2022-10-12 15:58:01 +01:00
observation ( ObsType ) : Observation of the initial state . This will be an element of : attr : ` observation_space `
2022-05-10 15:35:45 +01:00
( typically a numpy array ) and is analogous to the observation returned by : meth : ` step ` .
2022-08-23 11:09:54 -04:00
info ( dictionary ) : This dictionary contains auxiliary information complementing ` ` observation ` ` . It should be analogous to
2022-05-10 15:35:45 +01:00
the ` ` info ` ` returned by : meth : ` step ` .
2016-04-27 08:00:58 -07:00
"""
2022-02-10 18:24:41 +01:00
# Initialize the RNG if the seed is manually passed
if seed is not None :
self . _np_random , seed = seeding . np_random ( seed )
2016-04-27 08:00:58 -07:00
2022-11-12 10:21:24 +00:00
def render ( self ) - > RenderFrame | list [ RenderFrame ] | None :
2022-10-12 15:58:01 +01:00
""" Compute the render frames as specified by :attr:`render_mode` during the initialization of the environment.
2016-04-27 08:00:58 -07:00
2022-10-12 15:58:01 +01:00
The environment ' s :attr:`metadata` render modes (`env.metadata[ " render_modes " ]`) should contain the possible
ways to implement the render modes . In addition , list versions for most render modes is achieved through
` gymnasium . make ` which automatically applies a wrapper to collect rendered frames .
Note :
As the : attr : ` render_mode ` is known during ` ` __init__ ` ` , the objects used to render the environment state
should be initialised in ` ` __init__ ` ` .
By convention , if the : attr : ` render_mode ` is :
2022-06-08 00:20:56 +02:00
- None ( default ) : no render is computed .
2022-10-12 15:58:01 +01:00
- " human " : The environment is continuously rendered in the current display or terminal , usually for human consumption .
This rendering should occur during : meth : ` step ` and : meth : ` render ` doesn ' t need to be called. Returns ``None``.
- " rgb_array " : Return a single frame representing the current state of the environment .
A frame is a ` ` np . ndarray ` ` with shape ` ` ( x , y , 3 ) ` ` representing RGB values for an x - by - y pixel image .
- " ansi " : Return a strings ( ` ` str ` ` ) or ` ` StringIO . StringIO ` ` containing a terminal - style text representation
for each time step . The text can include newlines and ANSI escape sequences ( e . g . for colors ) .
- " rgb_array_list " and " ansi_list " : List based version of render modes are possible ( except Human ) through the
wrapper , : py : class : ` gymnasium . wrappers . RenderCollection ` that is automatically applied during ` ` gymnasium . make ( . . . , render_mode = " rgb_array_list " ) ` ` .
The frames collected are popped after : meth : ` render ` is called or : meth : ` reset ` .
2016-04-27 08:00:58 -07:00
Note :
2022-10-12 15:58:01 +01:00
Make sure that your class ' s :attr:`metadata` `` " render_modes " `` key includes the list of supported modes.
. . versionchanged : : 0.25 .0
The render function was changed to no longer accept parameters , rather these parameters should be specified
in the environment initialised , i . e . , ` ` gymnasium . make ( " CartPole-v1 " , render_mode = " human " ) ` `
2016-04-27 08:00:58 -07:00
"""
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
raise NotImplementedError
2016-04-27 08:00:58 -07:00
2016-05-15 15:59:02 -07:00
def close ( self ) :
2022-10-12 15:58:01 +01:00
""" After the user has finished using the environment, close contains the code necessary to " clean up " the environment.
This is critical for closing rendering windows , database or HTTP connections .
2023-06-21 17:39:50 +01:00
Calling ` ` close ` ` on an already closed environment has no effect and won ' t raise an error.
2022-10-12 15:58:01 +01:00
"""
2019-05-03 23:53:31 +02:00
pass
2016-05-15 15:59:02 -07:00
2016-08-17 15:16:45 -07:00
@property
2022-11-12 10:21:24 +00:00
def unwrapped ( self ) - > Env [ ObsType , ActType ] :
""" Returns the base non-wrapped environment.
2016-08-11 14:45:52 -07:00
Returns :
2022-10-12 15:58:01 +01:00
Env : The base non - wrapped : class : ` gymnasium . Env ` instance
2016-08-11 14:45:52 -07:00
"""
2017-02-26 00:01:00 -08:00
return self
2016-08-11 14:45:52 -07:00
2022-10-12 15:58:01 +01:00
@property
def np_random ( self ) - > np . random . Generator :
""" Returns the environment ' s internal :attr:`_np_random` that if not set will initialise with a random seed.
Returns :
Instances of ` np . random . Generator `
"""
if self . _np_random is None :
2023-02-22 13:58:29 +00:00
self . _np_random , _ = seeding . np_random ( )
2022-10-12 15:58:01 +01:00
return self . _np_random
@np_random.setter
def np_random ( self , value : np . random . Generator ) :
self . _np_random = value
2016-04-27 08:00:58 -07:00
def __str__ ( self ) :
2022-10-12 15:58:01 +01:00
""" Returns a string of the environment with :attr:`spec` id ' s if :attr:`spec.
Returns :
A string identifying the environment
"""
2017-06-16 16:35:03 -07:00
if self . spec is None :
2021-11-14 14:50:40 +01:00
return f " < { type ( self ) . __name__ } instance> "
2017-06-16 16:35:03 -07:00
else :
2021-11-14 14:50:40 +01:00
return f " < { type ( self ) . __name__ } < { self . spec . id } >> "
2016-04-27 08:00:58 -07:00
2019-02-25 15:53:58 -08:00
def __enter__ ( self ) :
2021-07-29 02:26:34 +02:00
""" Support with-statement for the environment. """
2019-02-25 15:53:58 -08:00
return self
2022-11-12 10:21:24 +00:00
def __exit__ ( self , * args : Any ) :
2022-10-12 15:58:01 +01:00
""" Support with-statement for the environment and closes the environment. """
2019-02-25 15:53:58 -08:00
self . close ( )
# propagate exception
return False
2023-07-14 16:03:20 +01:00
def get_wrapper_attr ( self , name : str ) - > Any :
""" Gets the attribute `name` from the environment. """
return getattr ( self , name )
2023-11-07 13:27:25 +00:00
def set_wrapper_attr ( self , name : str , value : Any ) :
""" Sets the attribute `name` on the environment with `value`. """
setattr ( self , name , value )
2018-02-26 17:35:07 +01:00
2022-11-12 10:21:24 +00:00
WrapperObsType = TypeVar ( " WrapperObsType " )
WrapperActType = TypeVar ( " WrapperActType " )
2023-02-22 13:58:29 +00:00
class Wrapper (
Env [ WrapperObsType , WrapperActType ] ,
Generic [ WrapperObsType , WrapperActType , ObsType , ActType ] ,
) :
2022-10-12 15:58:01 +01:00
""" Wraps a :class:`gymnasium.Env` to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
2022-12-03 13:46:11 +01:00
This class is the base class of all wrappers to change the behavior of the underlying environment .
Wrappers that inherit from this class can modify the : attr : ` action_space ` , : attr : ` observation_space ` ,
: attr : ` reward_range ` and : attr : ` metadata ` attributes , without changing the underlying environment ' s attributes.
Moreover , the behavior of the : meth : ` step ` and : meth : ` reset ` methods can be changed by these wrappers .
2022-10-12 15:58:01 +01:00
2022-12-03 13:46:11 +01:00
Some attributes ( : attr : ` spec ` , : attr : ` render_mode ` , : attr : ` np_random ` ) will point back to the wrapper ' s environment
( i . e . to the corresponding attributes of : attr : ` env ` ) .
2019-08-23 15:02:33 -07:00
2022-05-10 15:35:45 +01:00
Note :
2022-12-03 13:46:11 +01:00
If you inherit from : class : ` Wrapper ` , don ' t forget to call ``super().__init__(env)``
2019-05-03 23:53:31 +02:00
"""
2021-07-29 02:26:34 +02:00
2022-11-12 10:21:24 +00:00
def __init__ ( self , env : Env [ ObsType , ActType ] ) :
2022-05-10 15:35:45 +01:00
""" Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
Args :
env : The environment to wrap
"""
2016-08-13 19:24:48 -07:00
self . env = env
2023-11-07 13:27:25 +00:00
assert isinstance ( env , Env )
2021-09-17 18:02:59 -04:00
2022-11-12 10:21:24 +00:00
self . _action_space : spaces . Space [ WrapperActType ] | None = None
self . _observation_space : spaces . Space [ WrapperObsType ] | None = None
self . _metadata : dict [ str , Any ] | None = None
2016-12-23 16:21:42 -08:00
2023-03-08 14:07:09 +00:00
self . _cached_spec : EnvSpec | None = None
2023-11-07 13:27:25 +00:00
def step (
self , action : WrapperActType
) - > tuple [ WrapperObsType , SupportsFloat , bool , bool , dict [ str , Any ] ] :
""" Uses the :meth:`step` of the :attr:`env` that can be overwritten to change the returned data. """
return self . env . step ( action )
2023-07-14 16:03:20 +01:00
2023-11-07 13:27:25 +00:00
def reset (
self , * , seed : int | None = None , options : dict [ str , Any ] | None = None
) - > tuple [ WrapperObsType , dict [ str , Any ] ] :
""" Uses the :meth:`reset` of the :attr:`env` that can be overwritten to change the returned data. """
return self . env . reset ( seed = seed , options = options )
2023-07-14 16:03:20 +01:00
2023-11-07 13:27:25 +00:00
def render ( self ) - > RenderFrame | list [ RenderFrame ] | None :
""" Uses the :meth:`render` of the :attr:`env` that can be overwritten to change the returned data. """
return self . env . render ( )
2019-03-25 20:11:53 +01:00
2023-11-07 13:27:25 +00:00
def close ( self ) :
""" Closes the wrapper and :attr:`env`. """
return self . env . close ( )
2023-07-14 16:03:20 +01:00
2023-11-07 13:27:25 +00:00
@property
def unwrapped ( self ) - > Env [ ObsType , ActType ] :
""" Returns the base environment of the wrapper.
2023-07-14 16:03:20 +01:00
2023-11-07 13:27:25 +00:00
This will be the bare : class : ` gymnasium . Env ` environment , underneath all layers of wrappers .
2023-07-14 16:03:20 +01:00
"""
2023-11-07 13:27:25 +00:00
return self . env . unwrapped
2023-07-14 16:03:20 +01:00
2019-06-28 15:27:43 -07:00
@property
2022-11-12 10:21:24 +00:00
def spec ( self ) - > EnvSpec | None :
2023-02-24 11:34:20 +00:00
""" Returns the :attr:`Env` :attr:`spec` attribute with the `WrapperSpec` if the wrapper inherits from `EzPickle`. """
2023-03-08 14:07:09 +00:00
if self . _cached_spec is not None :
return self . _cached_spec
2023-02-24 11:34:20 +00:00
2023-03-08 14:07:09 +00:00
env_spec = self . env . spec
2023-02-24 11:34:20 +00:00
if env_spec is not None :
# See if the wrapper inherits from `RecordConstructorArgs` then add the kwargs otherwise use `None` for the wrapper kwargs. This will raise an error in `make`
if isinstance ( self , RecordConstructorArgs ) :
kwargs = getattr ( self , " _saved_kwargs " )
if " env " in kwargs :
kwargs = deepcopy ( kwargs )
kwargs . pop ( " env " )
else :
kwargs = None
2023-03-08 14:07:09 +00:00
from gymnasium . envs . registration import WrapperSpec
2023-02-24 11:34:20 +00:00
wrapper_spec = WrapperSpec (
name = self . class_name ( ) ,
entry_point = f " { self . __module__ } : { type ( self ) . __name__ } " ,
kwargs = kwargs ,
)
# to avoid reference issues we deepcopy the prior environments spec and add the new information
env_spec = deepcopy ( env_spec )
2023-03-08 14:07:09 +00:00
env_spec . additional_wrappers + = ( wrapper_spec , )
2023-02-24 11:34:20 +00:00
2023-03-08 14:07:09 +00:00
self . _cached_spec = env_spec
2023-02-24 11:34:20 +00:00
return env_spec
2019-06-28 15:27:43 -07:00
2023-03-08 14:07:09 +00:00
@classmethod
def wrapper_spec ( cls , * * kwargs : Any ) - > WrapperSpec :
""" Generates a `WrapperSpec` for the wrappers. """
from gymnasium . envs . registration import WrapperSpec
return WrapperSpec (
name = cls . class_name ( ) ,
entry_point = f " { cls . __module__ } : { cls . __name__ } " ,
kwargs = kwargs ,
)
2023-11-07 13:27:25 +00:00
def get_wrapper_attr ( self , name : str ) - > Any :
""" Gets an attribute from the wrapper and lower environments if `name` doesn ' t exist in this object.
Args :
name : The variable name to get
Returns :
The variable with name in wrapper or lower environments
"""
if hasattr ( self , name ) :
return getattr ( self , name )
else :
try :
return self . env . get_wrapper_attr ( name )
except AttributeError as e :
raise AttributeError (
f " wrapper { self . class_name ( ) } has no attribute { name !r} "
) from e
def set_wrapper_attr ( self , name : str , value : Any ) :
""" Sets an attribute on this wrapper or lower environment if `name` is already defined.
Args :
name : The variable name
value : The new variable value
"""
sub_env = self . env
attr_set = False
while attr_set is False and isinstance ( sub_env , Wrapper ) :
if hasattr ( sub_env , name ) :
setattr ( sub_env , name , value )
attr_set = True
else :
sub_env = sub_env . env
if attr_set is False :
setattr ( sub_env , name , value )
def __str__ ( self ) :
""" Returns the wrapper name and the :attr:`env` representation string. """
return f " < { type ( self ) . __name__ } { self . env } > "
def __repr__ ( self ) :
""" Returns the string representation of the wrapper. """
return str ( self )
2016-12-23 16:21:42 -08:00
@classmethod
2022-11-12 10:21:24 +00:00
def class_name ( cls ) - > str :
2022-05-10 15:35:45 +01:00
""" Returns the class name of the wrapper. """
2016-12-23 16:21:42 -08:00
return cls . __name__
2021-09-17 18:02:59 -04:00
@property
2022-11-12 10:21:24 +00:00
def action_space (
self ,
) - > spaces . Space [ ActType ] | spaces . Space [ WrapperActType ] :
2022-10-12 15:58:01 +01:00
""" Return the :attr:`Env` :attr:`action_space` unless overwritten then the wrapper :attr:`action_space` is used. """
2021-09-17 18:02:59 -04:00
if self . _action_space is None :
return self . env . action_space
return self . _action_space
@action_space.setter
2022-11-12 10:21:24 +00:00
def action_space ( self , space : spaces . Space [ WrapperActType ] ) :
2021-09-17 18:02:59 -04:00
self . _action_space = space
@property
2022-11-12 10:21:24 +00:00
def observation_space (
self ,
) - > spaces . Space [ ObsType ] | spaces . Space [ WrapperObsType ] :
2022-10-12 15:58:01 +01:00
""" Return the :attr:`Env` :attr:`observation_space` unless overwritten then the wrapper :attr:`observation_space` is used. """
2021-09-17 18:02:59 -04:00
if self . _observation_space is None :
return self . env . observation_space
return self . _observation_space
@observation_space.setter
2022-11-12 10:21:24 +00:00
def observation_space ( self , space : spaces . Space [ WrapperObsType ] ) :
2021-09-17 18:02:59 -04:00
self . _observation_space = space
@property
2022-11-12 10:21:24 +00:00
def metadata ( self ) - > dict [ str , Any ] :
2022-10-12 15:58:01 +01:00
""" Returns the :attr:`Env` :attr:`metadata`. """
2021-09-17 18:02:59 -04:00
if self . _metadata is None :
return self . env . metadata
return self . _metadata
@metadata.setter
2022-11-12 10:21:24 +00:00
def metadata ( self , value : dict [ str , Any ] ) :
2021-09-17 18:02:59 -04:00
self . _metadata = value
2022-06-13 03:55:24 +02:00
@property
2022-11-12 10:21:24 +00:00
def render_mode ( self ) - > str | None :
2022-10-12 15:58:01 +01:00
""" Returns the :attr:`Env` :attr:`render_mode`. """
2022-06-13 03:55:24 +02:00
return self . env . render_mode
2022-06-06 14:56:51 +01:00
@property
2022-08-22 09:20:28 -04:00
def np_random ( self ) - > np . random . Generator :
2022-10-12 15:58:01 +01:00
""" Returns the :attr:`Env` :attr:`np_random` attribute. """
2022-06-06 14:56:51 +01:00
return self . env . np_random
@np_random.setter
2022-11-12 10:21:24 +00:00
def np_random ( self , value : np . random . Generator ) :
2022-06-06 14:56:51 +01:00
self . env . np_random = value
@property
def _np_random ( self ) :
2022-11-12 10:21:24 +00:00
""" This code will never be run due to __getattr__ being called prior this.
It seems that @property overwrites the variable ( ` _np_random ` ) meaning that __getattr__ gets called with the missing variable .
"""
2022-06-06 14:56:51 +01:00
raise AttributeError (
" Can ' t access `_np_random` of a wrapper, use `.unwrapped._np_random` or `.np_random`. "
)
2016-09-04 00:38:03 -07:00
2023-02-22 13:58:29 +00:00
class ObservationWrapper ( Wrapper [ WrapperObsType , ActType , ObsType , ActType ] ) :
2023-11-07 13:27:25 +00:00
""" Modify observations from :meth:`Env.reset` and :meth:`Env.step` using :meth:`observation` function.
2022-05-13 13:58:19 +01:00
2022-10-12 15:58:01 +01:00
If you would like to apply a function to only the observation before
2022-10-10 14:19:17 +02:00
passing it to the learning code , you can simply inherit from : class : ` ObservationWrapper ` and overwrite the method
2022-05-13 13:58:19 +01:00
: meth : ` observation ` to implement that transformation . The transformation defined in that method must be
2022-10-12 15:58:01 +01:00
reflected by the : attr : ` env ` observation space . Otherwise , you need to specify the new observation space of the
wrapper by setting : attr : ` self . observation_space ` in the : meth : ` __init__ ` method of your wrapper .
2022-05-13 13:58:19 +01:00
"""
2022-05-10 15:35:45 +01:00
2022-11-12 10:21:24 +00:00
def __init__ ( self , env : Env [ ObsType , ActType ] ) :
""" Constructor for the observation wrapper. """
2023-02-24 11:34:20 +00:00
Wrapper . __init__ ( self , env )
2022-11-12 10:21:24 +00:00
def reset (
self , * , seed : int | None = None , options : dict [ str , Any ] | None = None
) - > tuple [ WrapperObsType , dict [ str , Any ] ] :
2022-10-12 15:58:01 +01:00
""" Modifies the :attr:`env` after calling :meth:`reset`, returning a modified observation using :meth:`self.observation`. """
2022-11-12 10:21:24 +00:00
obs , info = self . env . reset ( seed = seed , options = options )
2022-08-23 11:09:54 -04:00
return self . observation ( obs ) , info
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
2022-11-12 10:21:24 +00:00
def step (
self , action : ActType
) - > tuple [ WrapperObsType , SupportsFloat , bool , bool , dict [ str , Any ] ] :
2022-10-12 15:58:01 +01:00
""" Modifies the :attr:`env` after calling :meth:`step` using :meth:`self.observation` on the returned observations. """
2022-08-30 19:41:59 +05:30
observation , reward , terminated , truncated , info = self . env . step ( action )
return self . observation ( observation ) , reward , terminated , truncated , info
2019-05-03 23:53:31 +02:00
2022-11-12 10:21:24 +00:00
def observation ( self , observation : ObsType ) - > WrapperObsType :
2022-10-12 15:58:01 +01:00
""" Returns a modified observation.
Args :
observation : The : attr : ` env ` observation
Returns :
The modified observation
"""
2019-05-03 23:53:31 +02:00
raise NotImplementedError
2016-09-04 00:38:03 -07:00
2023-02-22 13:58:29 +00:00
class RewardWrapper ( Wrapper [ ObsType , ActType , ObsType , ActType ] ) :
2022-05-13 13:58:19 +01:00
""" Superclass of wrappers that can modify the returning reward from a step.
If you would like to apply a function to the reward that is returned by the base environment before
passing it to learning code , you can simply inherit from : class : ` RewardWrapper ` and overwrite the method
: meth : ` reward ` to implement that transformation .
2022-10-12 15:58:01 +01:00
This transformation might change the : attr : ` reward_range ` ; to specify the : attr : ` reward_range ` of your wrapper ,
2022-05-13 13:58:19 +01:00
you can simply define : attr : ` self . reward_range ` in : meth : ` __init__ ` .
"""
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
2022-11-12 10:21:24 +00:00
def __init__ ( self , env : Env [ ObsType , ActType ] ) :
""" Constructor for the Reward wrapper. """
2023-02-24 11:34:20 +00:00
Wrapper . __init__ ( self , env )
2022-11-12 10:21:24 +00:00
def step (
self , action : ActType
) - > tuple [ ObsType , SupportsFloat , bool , bool , dict [ str , Any ] ] :
2022-10-12 15:58:01 +01:00
""" Modifies the :attr:`env` :meth:`step` reward using :meth:`self.reward`. """
2022-08-30 19:41:59 +05:30
observation , reward , terminated , truncated , info = self . env . step ( action )
return observation , self . reward ( reward ) , terminated , truncated , info
2016-09-04 01:44:20 -07:00
2022-11-12 10:21:24 +00:00
def reward ( self , reward : SupportsFloat ) - > SupportsFloat :
2022-10-12 15:58:01 +01:00
""" Returns a modified environment ``reward``.
Args :
reward : The : attr : ` env ` : meth : ` step ` reward
Returns :
The modified ` reward `
"""
2019-05-03 23:53:31 +02:00
raise NotImplementedError
2016-09-04 00:38:03 -07:00
2023-02-22 13:58:29 +00:00
class ActionWrapper ( Wrapper [ ObsType , WrapperActType , ObsType , ActType ] ) :
2022-05-13 13:58:19 +01:00
""" Superclass of wrappers that can modify the action before :meth:`env.step`.
If you would like to apply a function to the action before passing it to the base environment ,
you can simply inherit from : class : ` ActionWrapper ` and overwrite the method : meth : ` action ` to implement
that transformation . The transformation defined in that method must take values in the base environment ’ s
action space . However , its domain might differ from the original action space .
In that case , you need to specify the new action space of the wrapper by setting : attr : ` self . action_space ` in
the : meth : ` __init__ ` method of your wrapper .
2022-10-12 15:58:01 +01:00
Among others , Gymnasium provides the action wrappers : class : ` ClipAction ` and : class : ` RescaleAction ` for clipping and rescaling actions .
2022-05-13 13:58:19 +01:00
"""
2019-03-25 18:23:14 +01:00
2022-11-12 10:21:24 +00:00
def __init__ ( self , env : Env [ ObsType , ActType ] ) :
""" Constructor for the action wrapper. """
2023-02-24 11:34:20 +00:00
Wrapper . __init__ ( self , env )
2022-11-12 10:21:24 +00:00
def step (
self , action : WrapperActType
) - > tuple [ ObsType , SupportsFloat , bool , bool , dict [ str , Any ] ] :
2022-10-12 15:58:01 +01:00
""" Runs the :attr:`env` :meth:`env.step` using the modified ``action`` from :meth:`self.action`. """
2019-05-03 23:53:31 +02:00
return self . env . step ( self . action ( action ) )
2016-09-04 00:38:03 -07:00
2022-11-12 10:21:24 +00:00
def action ( self , action : WrapperActType ) - > ActType :
2022-10-12 15:58:01 +01:00
""" Returns a modified action before :meth:`env.step` is called.
2016-09-04 01:44:20 -07:00
2022-10-12 15:58:01 +01:00
Args :
action : The original : meth : ` step ` actions
Returns :
The modified actions
"""
2021-07-29 02:26:34 +02:00
raise NotImplementedError