2022-05-10 15:35:45 +01:00
""" Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper. """
2022-05-25 15:28:19 +01:00
import sys
2022-06-08 00:20:56 +02:00
from typing import (
2022-07-04 18:19:25 +01:00
TYPE_CHECKING ,
2022-06-08 00:20:56 +02:00
Any ,
Dict ,
Generic ,
List ,
Optional ,
SupportsFloat ,
Tuple ,
TypeVar ,
Union ,
)
2021-08-05 16:35:07 +02:00
2022-02-10 18:24:41 +01:00
from gym import spaces
2022-05-25 15:28:19 +01:00
from gym . logger import deprecation , warn
2022-03-31 12:50:38 -07:00
from gym . utils import seeding
2022-02-10 18:24:41 +01:00
from gym . utils . seeding import RandomNumberGenerator
2016-05-27 12:16:35 -07:00
2022-07-04 18:19:25 +01:00
if TYPE_CHECKING :
from gym . envs . registration import EnvSpec
2022-06-25 20:10:55 +02:00
if sys . version_info [ 0 : 2 ] == ( 3 , 6 ) :
2022-05-25 15:28:19 +01:00
warn (
" Gym minimally supports python 3.6 as the python foundation not longer supports the version, please update your version to 3.7+ "
)
2021-12-22 19:12:57 +01:00
ObsType = TypeVar ( " ObsType " )
ActType = TypeVar ( " ActType " )
2022-06-08 00:20:56 +02:00
RenderFrame = TypeVar ( " RenderFrame " )
class _EnvDecorator ( type ) : # TODO: remove with gym 1.0
""" Metaclass used for adding deprecation warning to the mode kwarg in the render method. """
def __new__ ( cls , name , bases , attr ) :
if " render " in attr . keys ( ) :
attr [ " render " ] = _EnvDecorator . _deprecate_mode ( attr [ " render " ] )
return super ( ) . __new__ ( cls , name , bases , attr )
@staticmethod
def _deprecate_mode ( render_func ) : # type: ignore
render_return = Optional [ Union [ RenderFrame , List [ RenderFrame ] ] ]
def render (
self : object , * args : Tuple [ Any ] , * * kwargs : Dict [ str , Any ]
) - > render_return :
2022-06-15 15:33:03 +02:00
if " mode " in kwargs . keys ( ) or len ( args ) > 0 :
2022-06-08 00:20:56 +02:00
deprecation (
" The argument mode in render method is deprecated; "
" use render_mode during environment initialization instead. \n "
" See here for more information: https://www.gymlibrary.ml/content/api/ "
)
elif self . spec is not None and " render_mode " not in self . spec . kwargs . keys ( ) : # type: ignore
deprecation (
" You are calling render method, "
" but you didn ' t specified the argument render_mode at environment initialization. "
" To maintain backward compatibility, the environment will render in human mode. \n "
" If you want to render in human mode, initialize the environment in this way: "
" gym.make( ' EnvName ' , render_mode= ' human ' ) and don ' t call the render method. \n "
" See here for more information: https://www.gymlibrary.ml/content/api/ "
)
return render_func ( self , * args , * * kwargs )
return render
2021-12-22 19:12:57 +01:00
2016-04-27 08:00:58 -07:00
2022-06-25 20:10:55 +02:00
decorator = _EnvDecorator
if sys . version_info [ 0 : 2 ] == ( 3 , 6 ) :
# needed for https://github.com/python/typing/issues/449
from typing import GenericMeta
class _GenericEnvDecorator ( GenericMeta , _EnvDecorator ) :
pass
decorator = _GenericEnvDecorator
class Env ( Generic [ ObsType , ActType ] , metaclass = decorator ) :
2022-05-10 15:35:45 +01:00
r """ The main OpenAI Gym class.
It encapsulates an environment with arbitrary behind - the - scenes dynamics .
An environment can be partially or fully observed .
2016-04-28 10:33:37 -07:00
The main API methods that users of this class need to know are :
2022-05-10 15:35:45 +01:00
- : meth : ` step ` - Takes a step in the environment using an action returning the next observation , reward ,
if the environment terminated and more information .
- : meth : ` reset ` - Resets the environment to an initial state , returning the initial observation .
- : meth : ` render ` - Renders the environment observation with modes depending on the output
- : meth : ` close ` - Closes the environment , important for rendering where pygame is imported
- : meth : ` seed ` - Seeds the environment ' s random number generator, :deprecated: in favor of `Env.reset(seed=seed)`.
2016-04-27 08:00:58 -07:00
And set the following attributes :
2022-05-10 15:35:45 +01:00
- : attr : ` action_space ` - The Space object corresponding to valid actions
- : attr : ` observation_space ` - The Space object corresponding to valid observations
- : attr : ` reward_range ` - A tuple corresponding to the minimum and maximum possible rewards
- : attr : ` spec ` - An environment spec that contains the information used to initialise the environment from ` gym . make `
- : attr : ` metadata ` - The metadata of the environment , i . e . render modes
- : attr : ` np_random ` - The random number generator for the environment
2016-08-24 23:10:58 +02:00
2022-05-10 15:35:45 +01:00
Note : a default reward range set to : math : ` ( - \infty , + \infty ) ` already exists . Set it if you want a narrower range .
2016-04-27 08:00:58 -07:00
"""
2021-07-29 02:26:34 +02:00
2016-04-27 08:00:58 -07:00
# Set this in SOME subclasses
2022-02-28 15:54:03 -05:00
metadata = { " render_modes " : [ ] }
2022-06-08 00:20:56 +02:00
render_mode = None # define render_mode if your environment supports rendering
2021-07-29 02:26:34 +02:00
reward_range = ( - float ( " inf " ) , float ( " inf " ) )
2022-07-04 18:19:25 +01:00
spec : " EnvSpec " = None
2016-05-15 15:59:02 -07:00
2016-04-27 08:00:58 -07:00
# Set these in ALL subclasses
2021-12-22 19:12:57 +01:00
action_space : spaces . Space [ ActType ]
observation_space : spaces . Space [ ObsType ]
2016-04-27 08:00:58 -07:00
2021-12-08 22:14:15 +01:00
# Created
2022-05-10 15:35:45 +01:00
_np_random : Optional [ RandomNumberGenerator ] = None
2022-02-10 18:24:41 +01:00
@property
def np_random ( self ) - > RandomNumberGenerator :
2022-05-10 15:35:45 +01:00
""" Returns the environment ' s internal :attr:`_np_random` that if not set will initialise with a random seed. """
2022-02-10 18:24:41 +01:00
if self . _np_random is None :
self . _np_random , seed = seeding . np_random ( )
return self . _np_random
2021-12-08 22:14:15 +01:00
2022-02-18 01:38:22 +01:00
@np_random.setter
def np_random ( self , value : RandomNumberGenerator ) :
self . _np_random = value
2022-07-10 02:18:06 +05:30
def step (
self , action : ActType
) - > Union [
Tuple [ ObsType , float , bool , bool , dict ] , Tuple [ ObsType , float , bool , dict ]
] :
2022-05-10 15:35:45 +01:00
""" Run one timestep of the environment ' s dynamics.
2016-04-27 08:00:58 -07:00
2022-05-10 15:35:45 +01:00
When end of episode is reached , you are responsible for calling : meth : ` reset ` to reset this environment ' s state.
2022-07-10 02:18:06 +05:30
Accepts an action and returns either a tuple ` ( observation , reward , terminated , truncated , info ) ` , or a tuple
( observation , reward , done , info ) . The latter is deprecated and will be removed in future versions .
2016-04-27 08:00:58 -07:00
2016-05-27 12:16:35 -07:00
Args :
2022-05-25 14:46:41 +01:00
action ( ActType ) : an action provided by the agent
2016-04-27 08:00:58 -07:00
2016-05-27 12:16:35 -07:00
Returns :
2022-05-10 15:35:45 +01:00
observation ( object ) : this will be an element of the environment ' s :attr:`observation_space`.
This may , for instance , be a numpy array containing the positions and velocities of certain objects .
reward ( float ) : The amount of reward returned as a result of taking the action .
2022-07-10 02:18:06 +05:30
terminated ( bool ) : whether a ` terminal state ` ( as defined under the MDP of the task ) is reached .
In this case further step ( ) calls could return undefined results .
truncated ( bool ) : whether a truncation condition outside the scope of the MDP is satisfied .
Typically a timelimit , but could also be used to indicate agent physically going out of bounds .
Can be used to end the episode prematurely before a ` terminal state ` is reached .
info ( dictionary ) : ` info ` contains auxiliary diagnostic information ( helpful for debugging , learning , and logging ) .
This might , for instance , contain : metrics that describe the agent ' s performance state, variables that are
hidden from observations , or individual reward terms that are combined to produce the total reward .
It also can contain information that distinguishes truncation and termination , however this is deprecated in favour
of returning two booleans , and will be removed in a future version .
( deprecated )
2022-05-10 15:35:45 +01:00
done ( bool ) : A boolean value for if the episode has ended , in which case further : meth : ` step ` calls will return undefined results .
A done signal may be emitted for different reasons : Maybe the task underlying the environment was solved successfully ,
a certain timelimit was exceeded , or the physics simulation has entered an invalid state .
2016-04-27 08:00:58 -07:00
"""
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
raise NotImplementedError
2016-04-27 08:00:58 -07:00
2022-01-19 23:28:59 +01:00
def reset (
2022-02-06 17:28:27 -06:00
self ,
* ,
seed : Optional [ int ] = None ,
return_info : bool = False ,
options : Optional [ dict ] = None ,
2022-05-25 15:28:19 +01:00
) - > Union [ ObsType , Tuple [ ObsType , dict ] ] :
2022-05-10 15:35:45 +01:00
""" Resets the environment to an initial state and returns the initial observation.
This method can reset the environment ' s random number generator(s) if ``seed`` is an integer or
if the environment has not yet initialized a random number generator .
If the environment already has a random number generator and : meth : ` reset ` is called with ` ` seed = None ` ` ,
the RNG should not be reset . Moreover , : meth : ` reset ` should ( in the typical use case ) be called with an
2022-02-13 01:39:03 +01:00
integer seed right after initialization and then never again .
2016-04-27 08:00:58 -07:00
2022-04-06 20:12:55 +01:00
Args :
2022-05-10 15:35:45 +01:00
seed ( optional int ) : The seed that is used to initialize the environment ' s PRNG.
If the environment does not already have a PRNG and ` ` seed = None ` ` ( the default option ) is passed ,
a seed will be chosen from some source of entropy ( e . g . timestamp or / dev / urandom ) .
However , if the environment already has a PRNG and ` ` seed = None ` ` is passed , the PRNG will * not * be reset .
If you pass an integer , the PRNG will be reset even if it already exists .
Usually , you want to pass an integer * right after the environment has been initialized and then never again * .
Please refer to the minimal example above to see this paradigm in action .
return_info ( bool ) : If true , return additional information along with initial observation .
This info should be analogous to the info returned in : meth : ` step `
options ( optional dict ) : Additional information to specify how the environment is reset ( optional ,
depending on the specific environment )
2022-04-06 20:12:55 +01:00
2019-08-23 15:02:33 -07:00
Returns :
2022-05-10 15:35:45 +01:00
observation ( object ) : Observation of the initial state . This will be an element of : attr : ` observation_space `
( typically a numpy array ) and is analogous to the observation returned by : meth : ` step ` .
info ( optional dictionary ) : This will * only * be returned if ` ` return_info = True ` ` is passed .
It contains auxiliary information complementing ` ` observation ` ` . This dictionary should be analogous to
the ` ` info ` ` returned by : meth : ` step ` .
2016-04-27 08:00:58 -07:00
"""
2022-02-10 18:24:41 +01:00
# Initialize the RNG if the seed is manually passed
if seed is not None :
self . _np_random , seed = seeding . np_random ( seed )
2016-04-27 08:00:58 -07:00
2022-06-08 00:20:56 +02:00
# TODO: remove kwarg mode with gym 1.0
def render ( self , mode = " human " ) - > Optional [ Union [ RenderFrame , List [ RenderFrame ] ] ] :
""" Compute the render frames as specified by render_mode attribute during initialization of the environment.
2016-04-27 08:00:58 -07:00
2022-06-08 00:20:56 +02:00
The set of supported modes varies per environment . ( And some
2022-02-13 01:39:03 +01:00
third - party environments may not support rendering at all . )
2022-06-08 00:20:56 +02:00
By convention , if render_mode is :
- None ( default ) : no render is computed .
- human : render return None .
The environment is continuously rendered in the current display or terminal . Usually for human consumption .
- single_rgb_array : return a single frame representing the current state of the environment .
A frame is a numpy . ndarray with shape ( x , y , 3 ) representing RGB values for an x - by - y pixel image .
- rgb_array : return a list of frames representing the states of the environment since the last reset .
Each frame is a numpy . ndarray with shape ( x , y , 3 ) , as with single_rgb_array .
- ansi : Return a list of strings ( str ) or StringIO . StringIO containing a
terminal - style text representation for each time step .
The text can include newlines and ANSI escape sequences ( e . g . for colors ) .
2016-04-27 08:00:58 -07:00
2022-06-08 00:20:56 +02:00
Note :
Rendering computations is performed internally even if you don ' t call render().
To avoid this , you can set render_mode = None and , if the environment supports it ,
call render ( ) specifying the argument ' mode ' .
2022-04-08 03:19:52 +02:00
2016-04-27 08:00:58 -07:00
Note :
2022-02-28 15:54:03 -05:00
Make sure that your class ' s metadata ' render_modes ' key includes
2022-05-10 15:35:45 +01:00
the list of supported modes . It ' s recommended to call super()
in implementations to use the functionality of this method .
2016-04-27 08:00:58 -07:00
"""
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
raise NotImplementedError
2016-04-27 08:00:58 -07:00
2016-05-15 15:59:02 -07:00
def close ( self ) :
2019-05-03 23:53:31 +02:00
""" Override close in your subclass to perform any necessary cleanup.
2016-05-27 12:16:35 -07:00
2022-05-10 15:35:45 +01:00
Environments will automatically : meth : ` close ( ) ` themselves when
2016-05-27 12:16:35 -07:00
garbage collected or when the program exits .
2016-05-15 15:59:02 -07:00
"""
2019-05-03 23:53:31 +02:00
pass
2016-05-15 15:59:02 -07:00
2016-05-29 09:07:09 -07:00
def seed ( self , seed = None ) :
2022-05-10 15:35:45 +01:00
""" :deprecated: function that sets the seed for the environment ' s random number generator(s).
Use ` env . reset ( seed = seed ) ` as the new API for setting the seed of the environment .
2016-05-29 09:07:09 -07:00
Note :
Some environments use multiple pseudorandom number generators .
We want to capture all such seeds used in order to ensure that
there aren ' t accidental correlations between multiple generators.
2022-05-10 15:35:45 +01:00
Args :
2022-06-19 21:49:28 +01:00
seed ( Optional int ) : The seed value for the random number generator
2022-05-10 15:35:45 +01:00
2016-05-29 09:07:09 -07:00
Returns :
2022-05-10 15:35:45 +01:00
seeds ( List [ int ] ) : Returns the list of seeds used in this environment ' s random
2016-05-29 09:07:09 -07:00
number generators . The first value in the list should be the
" main " seed , or the value which a reproducer should pass to
' seed ' . Often , the main seed equals the provided ' seed ' , but
2022-05-10 15:35:45 +01:00
this won ' t be true `if seed=None`, for example.
2016-05-29 09:07:09 -07:00
"""
2021-12-08 22:14:15 +01:00
deprecation (
" Function `env.seed(seed)` is marked as deprecated and will be removed in the future. "
2022-05-10 15:35:45 +01:00
" Please use `env.reset(seed=seed)` instead. "
2021-12-08 22:14:15 +01:00
)
2022-02-10 18:24:41 +01:00
self . _np_random , seed = seeding . np_random ( seed )
2021-12-08 22:14:15 +01:00
return [ seed ]
2017-06-16 16:35:03 -07:00
2016-08-17 15:16:45 -07:00
@property
2022-05-25 15:28:19 +01:00
def unwrapped ( self ) - > " Env " :
2022-05-10 15:35:45 +01:00
""" Returns the base non-wrapped environment.
2016-08-11 14:45:52 -07:00
Returns :
2022-05-25 14:46:41 +01:00
Env : The base non - wrapped gym . Env instance
2016-08-11 14:45:52 -07:00
"""
2017-02-26 00:01:00 -08:00
return self
2016-08-11 14:45:52 -07:00
2016-04-27 08:00:58 -07:00
def __str__ ( self ) :
2022-05-10 15:35:45 +01:00
""" Returns a string of the environment with the spec id if specified. """
2017-06-16 16:35:03 -07:00
if self . spec is None :
2021-11-14 14:50:40 +01:00
return f " < { type ( self ) . __name__ } instance> "
2017-06-16 16:35:03 -07:00
else :
2021-11-14 14:50:40 +01:00
return f " < { type ( self ) . __name__ } < { self . spec . id } >> "
2016-04-27 08:00:58 -07:00
2019-02-25 15:53:58 -08:00
def __enter__ ( self ) :
2021-07-29 02:26:34 +02:00
""" Support with-statement for the environment. """
2019-02-25 15:53:58 -08:00
return self
def __exit__ ( self , * args ) :
2021-07-29 02:26:34 +02:00
""" Support with-statement for the environment. """
2019-02-25 15:53:58 -08:00
self . close ( )
# propagate exception
return False
2018-02-26 17:35:07 +01:00
2022-02-05 17:25:47 +01:00
class Wrapper ( Env [ ObsType , ActType ] ) :
2022-05-10 15:35:45 +01:00
""" Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
2019-08-23 15:02:33 -07:00
2019-05-03 23:53:31 +02:00
This class is the base class for all wrappers . The subclass could override
some methods to change the behavior of the original environment without touching the
2019-08-23 15:02:33 -07:00
original code .
2022-05-10 15:35:45 +01:00
Note :
2019-05-03 23:53:31 +02:00
Don ' t forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
"""
2021-07-29 02:26:34 +02:00
2022-07-10 02:18:06 +05:30
def __init__ ( self , env : Env , new_step_api : bool = False ) :
2022-05-10 15:35:45 +01:00
""" Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
Args :
env : The environment to wrap
2022-07-10 02:18:06 +05:30
new_step_api : Whether the wrapper ' s step method will output in new or old step API
2022-05-10 15:35:45 +01:00
"""
2016-08-13 19:24:48 -07:00
self . env = env
2021-09-17 18:02:59 -04:00
2022-05-10 15:35:45 +01:00
self . _action_space : Optional [ spaces . Space ] = None
self . _observation_space : Optional [ spaces . Space ] = None
2022-05-25 15:28:19 +01:00
self . _reward_range : Optional [ Tuple [ SupportsFloat , SupportsFloat ] ] = None
2022-05-10 15:35:45 +01:00
self . _metadata : Optional [ dict ] = None
2022-07-10 02:18:06 +05:30
self . new_step_api = new_step_api
if not self . new_step_api :
deprecation (
2022-07-11 02:45:24 +01:00
" Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future. "
2022-07-10 02:18:06 +05:30
)
2016-12-23 16:21:42 -08:00
2019-03-25 20:11:53 +01:00
def __getattr__ ( self , name ) :
2022-05-10 15:35:45 +01:00
""" Returns an attribute with ``name``, unless ``name`` starts with an underscore. """
2021-07-29 02:26:34 +02:00
if name . startswith ( " _ " ) :
2022-03-24 19:10:06 +01:00
raise AttributeError ( f " accessing private attribute ' { name } ' is prohibited " )
2019-03-25 20:11:53 +01:00
return getattr ( self . env , name )
2019-06-28 15:27:43 -07:00
@property
def spec ( self ) :
2022-05-10 15:35:45 +01:00
""" Returns the environment specification. """
2019-06-28 15:27:43 -07:00
return self . env . spec
2016-12-23 16:21:42 -08:00
@classmethod
def class_name ( cls ) :
2022-05-10 15:35:45 +01:00
""" Returns the class name of the wrapper. """
2016-12-23 16:21:42 -08:00
return cls . __name__
2021-09-17 18:02:59 -04:00
@property
2022-02-05 17:25:47 +01:00
def action_space ( self ) - > spaces . Space [ ActType ] :
2022-05-10 15:35:45 +01:00
""" Returns the action space of the environment. """
2021-09-17 18:02:59 -04:00
if self . _action_space is None :
return self . env . action_space
return self . _action_space
@action_space.setter
2022-05-10 15:35:45 +01:00
def action_space ( self , space : spaces . Space ) :
2021-09-17 18:02:59 -04:00
self . _action_space = space
@property
2022-02-05 17:25:47 +01:00
def observation_space ( self ) - > spaces . Space :
2022-05-10 15:35:45 +01:00
""" Returns the observation space of the environment. """
2021-09-17 18:02:59 -04:00
if self . _observation_space is None :
return self . env . observation_space
return self . _observation_space
@observation_space.setter
2022-05-10 15:35:45 +01:00
def observation_space ( self , space : spaces . Space ) :
2021-09-17 18:02:59 -04:00
self . _observation_space = space
@property
2022-05-25 15:28:19 +01:00
def reward_range ( self ) - > Tuple [ SupportsFloat , SupportsFloat ] :
2022-05-10 15:35:45 +01:00
""" Return the reward range of the environment. """
2021-09-17 18:02:59 -04:00
if self . _reward_range is None :
return self . env . reward_range
return self . _reward_range
@reward_range.setter
2022-05-25 15:28:19 +01:00
def reward_range ( self , value : Tuple [ SupportsFloat , SupportsFloat ] ) :
2021-09-17 18:02:59 -04:00
self . _reward_range = value
@property
2022-02-05 17:25:47 +01:00
def metadata ( self ) - > dict :
2022-05-10 15:35:45 +01:00
""" Returns the environment metadata. """
2021-09-17 18:02:59 -04:00
if self . _metadata is None :
return self . env . metadata
return self . _metadata
@metadata.setter
def metadata ( self , value ) :
self . _metadata = value
2022-06-13 03:55:24 +02:00
@property
def render_mode ( self ) - > Optional [ str ] :
""" Returns the environment render_mode. """
return self . env . render_mode
2022-06-06 14:56:51 +01:00
@property
def np_random ( self ) - > RandomNumberGenerator :
""" Returns the environment np_random. """
return self . env . np_random
@np_random.setter
def np_random ( self , value ) :
self . env . np_random = value
@property
def _np_random ( self ) :
raise AttributeError (
" Can ' t access `_np_random` of a wrapper, use `.unwrapped._np_random` or `.np_random`. "
)
2022-07-10 02:18:06 +05:30
def step (
self , action : ActType
) - > Union [
Tuple [ ObsType , float , bool , bool , dict ] , Tuple [ ObsType , float , bool , dict ]
] :
2022-05-10 15:35:45 +01:00
""" Steps through the environment with action. """
2022-07-10 02:18:06 +05:30
from gym . utils . step_api_compatibility import ( # avoid circular import
step_api_compatibility ,
)
return step_api_compatibility ( self . env . step ( action ) , self . new_step_api )
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
2022-05-25 15:28:19 +01:00
def reset ( self , * * kwargs ) - > Union [ ObsType , Tuple [ ObsType , dict ] ] :
2022-05-10 15:35:45 +01:00
""" Resets the environment with kwargs. """
2022-01-19 23:28:59 +01:00
return self . env . reset ( * * kwargs )
2016-08-11 14:45:52 -07:00
2022-07-27 00:20:51 +02:00
def render (
self , * args , * * kwargs
) - > Optional [ Union [ RenderFrame , List [ RenderFrame ] ] ] :
2022-06-15 15:33:03 +02:00
""" Renders the environment. """
return self . env . render ( * args , * * kwargs )
2016-08-11 14:45:52 -07:00
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
def close ( self ) :
2022-05-10 15:35:45 +01:00
""" Closes the environment. """
2019-03-25 18:23:14 +01:00
return self . env . close ( )
2016-08-11 14:45:52 -07:00
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
def seed ( self , seed = None ) :
2022-05-10 15:35:45 +01:00
""" Seeds the environment. """
2016-08-11 14:45:52 -07:00
return self . env . seed ( seed )
def __str__ ( self ) :
2022-05-10 15:35:45 +01:00
""" Returns the wrapper name and the unwrapped environment string. """
2021-11-14 14:50:40 +01:00
return f " < { type ( self ) . __name__ } { self . env } > "
2016-09-04 00:38:03 -07:00
def __repr__ ( self ) :
2022-05-10 15:35:45 +01:00
""" Returns the string representation of the wrapper. """
2016-09-04 00:38:03 -07:00
return str ( self )
2016-08-17 15:16:45 -07:00
@property
2022-02-05 17:25:47 +01:00
def unwrapped ( self ) - > Env :
2022-05-10 15:35:45 +01:00
""" Returns the base environment of the wrapper. """
2017-02-26 00:01:00 -08:00
return self . env . unwrapped
2016-09-04 00:38:03 -07:00
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
class ObservationWrapper ( Wrapper ) :
2022-05-13 13:58:19 +01:00
""" Superclass of wrappers that can modify observations using :meth:`observation` for :meth:`reset` and :meth:`step`.
If you would like to apply a function to the observation that is returned by the base environment before
passing it to learning code , you can simply inherit from : class : ` ObservationWrapper ` and overwrite the method
: meth : ` observation ` to implement that transformation . The transformation defined in that method must be
defined on the base environment ’ s observation space . However , it may take values in a different space .
In that case , you need to specify the new observation space of the wrapper by setting : attr : ` self . observation_space `
in the : meth : ` __init__ ` method of your wrapper .
For example , you might have a 2 D navigation task where the environment returns dictionaries as observations with
keys ` ` " agent_position " ` ` and ` ` " target_position " ` ` . A common thing to do might be to throw away some degrees of
freedom and only consider the position of the target relative to the agent , i . e .
` ` observation [ " target_position " ] - observation [ " agent_position " ] ` ` . For this , you could implement an
observation wrapper like this : :
class RelativePosition ( gym . ObservationWrapper ) :
def __init__ ( self , env ) :
super ( ) . __init__ ( env )
self . observation_space = Box ( shape = ( 2 , ) , low = - np . inf , high = np . inf )
def observation ( self , obs ) :
return obs [ " target " ] - obs [ " agent " ]
Among others , Gym provides the observation wrapper : class : ` TimeAwareObservation ` , which adds information about the
index of the timestep to the observation .
"""
2022-05-10 15:35:45 +01:00
2022-01-19 23:28:59 +01:00
def reset ( self , * * kwargs ) :
2022-05-10 15:35:45 +01:00
""" Resets the environment, returning a modified observation using :meth:`self.observation`. """
2022-02-17 18:03:35 +01:00
if kwargs . get ( " return_info " , False ) :
obs , info = self . env . reset ( * * kwargs )
return self . observation ( obs ) , info
else :
return self . observation ( self . env . reset ( * * kwargs ) )
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
2019-05-03 23:53:31 +02:00
def step ( self , action ) :
2022-05-10 15:35:45 +01:00
""" Returns a modified observation using :meth:`self.observation` after calling :meth:`env.step`. """
2022-07-10 02:18:06 +05:30
step_returns = self . env . step ( action )
if len ( step_returns ) == 5 :
observation , reward , terminated , truncated , info = step_returns
return self . observation ( observation ) , reward , terminated , truncated , info
else :
observation , reward , done , info = step_returns
return self . observation ( observation ) , reward , done , info
2019-05-03 23:53:31 +02:00
2016-09-04 01:44:20 -07:00
def observation ( self , observation ) :
2022-05-10 15:35:45 +01:00
""" Returns a modified observation. """
2019-05-03 23:53:31 +02:00
raise NotImplementedError
2016-09-04 00:38:03 -07:00
class RewardWrapper ( Wrapper ) :
2022-05-13 13:58:19 +01:00
""" Superclass of wrappers that can modify the returning reward from a step.
If you would like to apply a function to the reward that is returned by the base environment before
passing it to learning code , you can simply inherit from : class : ` RewardWrapper ` and overwrite the method
: meth : ` reward ` to implement that transformation .
This transformation might change the reward range ; to specify the reward range of your wrapper ,
you can simply define : attr : ` self . reward_range ` in : meth : ` __init__ ` .
Let us look at an example : Sometimes ( especially when we do not have control over the reward
because it is intrinsic ) , we want to clip the reward to a range to gain some numerical stability .
To do that , we could , for instance , implement the following wrapper : :
class ClipReward ( gym . RewardWrapper ) :
def __init__ ( self , env , min_reward , max_reward ) :
super ( ) . __init__ ( env )
self . min_reward = min_reward
self . max_reward = max_reward
self . reward_range = ( min_reward , max_reward )
def reward ( self , reward ) :
return np . clip ( reward , self . min_reward , self . max_reward )
"""
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
def step ( self , action ) :
2022-05-10 15:35:45 +01:00
""" Modifies the reward using :meth:`self.reward` after the environment :meth:`env.step`. """
2022-07-10 02:18:06 +05:30
step_returns = self . env . step ( action )
if len ( step_returns ) == 5 :
observation , reward , terminated , truncated , info = step_returns
return observation , self . reward ( reward ) , terminated , truncated , info
else :
observation , reward , done , info = step_returns
return observation , self . reward ( reward ) , done , info
2016-09-04 01:44:20 -07:00
def reward ( self , reward ) :
2022-05-10 15:35:45 +01:00
""" Returns a modified ``reward``. """
2019-05-03 23:53:31 +02:00
raise NotImplementedError
2016-09-04 00:38:03 -07:00
class ActionWrapper ( Wrapper ) :
2022-05-13 13:58:19 +01:00
""" Superclass of wrappers that can modify the action before :meth:`env.step`.
If you would like to apply a function to the action before passing it to the base environment ,
you can simply inherit from : class : ` ActionWrapper ` and overwrite the method : meth : ` action ` to implement
that transformation . The transformation defined in that method must take values in the base environment ’ s
action space . However , its domain might differ from the original action space .
In that case , you need to specify the new action space of the wrapper by setting : attr : ` self . action_space ` in
the : meth : ` __init__ ` method of your wrapper .
Let ’ s say you have an environment with action space of type : class : ` gym . spaces . Box ` , but you would only like
to use a finite subset of actions . Then , you might want to implement the following wrapper : :
class DiscreteActions ( gym . ActionWrapper ) :
def __init__ ( self , env , disc_to_cont ) :
super ( ) . __init__ ( env )
self . disc_to_cont = disc_to_cont
self . action_space = Discrete ( len ( disc_to_cont ) )
def action ( self , act ) :
return self . disc_to_cont [ act ]
if __name__ == " __main__ " :
env = gym . make ( " LunarLanderContinuous-v2 " )
wrapped_env = DiscreteActions ( env , [ np . array ( [ 1 , 0 ] ) , np . array ( [ - 1 , 0 ] ) ,
np . array ( [ 0 , 1 ] ) , np . array ( [ 0 , - 1 ] ) ] )
print ( wrapped_env . action_space ) #Discrete(4)
Among others , Gym provides the action wrappers : class : ` ClipAction ` and : class : ` RescaleAction ` .
"""
2019-03-25 18:23:14 +01:00
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
def step ( self , action ) :
2022-05-10 15:35:45 +01:00
""" Runs the environment :meth:`env.step` using the modified ``action`` from :meth:`self.action`. """
2019-05-03 23:53:31 +02:00
return self . env . step ( self . action ( action ) )
2016-09-04 00:38:03 -07:00
2016-09-04 01:44:20 -07:00
def action ( self , action ) :
2022-05-10 15:35:45 +01:00
""" Returns a modified action before :meth:`env.step` is called. """
2019-05-03 23:53:31 +02:00
raise NotImplementedError
2016-09-04 01:44:20 -07:00
2016-10-14 22:07:47 -07:00
def reverse_action ( self , action ) :
2022-05-10 15:35:45 +01:00
""" Returns a reversed ``action``. """
2021-07-29 02:26:34 +02:00
raise NotImplementedError