2022-05-10 15:35:45 +01:00
|
|
|
|
"""Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper."""
|
2022-05-25 15:28:19 +01:00
|
|
|
|
import sys
|
|
|
|
|
from typing import Generic, Optional, SupportsFloat, Tuple, TypeVar, Union
|
2021-08-05 16:35:07 +02:00
|
|
|
|
|
2022-02-10 18:24:41 +01:00
|
|
|
|
from gym import spaces
|
2022-05-25 15:28:19 +01:00
|
|
|
|
from gym.logger import deprecation, warn
|
2022-03-31 12:50:38 -07:00
|
|
|
|
from gym.utils import seeding
|
2022-02-10 18:24:41 +01:00
|
|
|
|
from gym.utils.seeding import RandomNumberGenerator
|
2016-05-27 12:16:35 -07:00
|
|
|
|
|
2022-05-25 15:28:19 +01:00
|
|
|
|
if sys.version_info == (3, 6):
|
|
|
|
|
warn(
|
|
|
|
|
"Gym minimally supports python 3.6 as the python foundation not longer supports the version, please update your version to 3.7+"
|
|
|
|
|
)
|
|
|
|
|
|
2021-12-22 19:12:57 +01:00
|
|
|
|
ObsType = TypeVar("ObsType")
|
|
|
|
|
ActType = TypeVar("ActType")
|
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2021-12-22 19:12:57 +01:00
|
|
|
|
class Env(Generic[ObsType, ActType]):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
r"""The main OpenAI Gym class.
|
|
|
|
|
|
|
|
|
|
It encapsulates an environment with arbitrary behind-the-scenes dynamics.
|
|
|
|
|
An environment can be partially or fully observed.
|
2016-04-28 10:33:37 -07:00
|
|
|
|
|
|
|
|
|
The main API methods that users of this class need to know are:
|
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
- :meth:`step` - Takes a step in the environment using an action returning the next observation, reward,
|
|
|
|
|
if the environment terminated and more information.
|
|
|
|
|
- :meth:`reset` - Resets the environment to an initial state, returning the initial observation.
|
|
|
|
|
- :meth:`render` - Renders the environment observation with modes depending on the output
|
|
|
|
|
- :meth:`close` - Closes the environment, important for rendering where pygame is imported
|
|
|
|
|
- :meth:`seed` - Seeds the environment's random number generator, :deprecated: in favor of `Env.reset(seed=seed)`.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
|
|
And set the following attributes:
|
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
- :attr:`action_space` - The Space object corresponding to valid actions
|
|
|
|
|
- :attr:`observation_space` - The Space object corresponding to valid observations
|
|
|
|
|
- :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards
|
|
|
|
|
- :attr:`spec` - An environment spec that contains the information used to initialise the environment from `gym.make`
|
|
|
|
|
- :attr:`metadata` - The metadata of the environment, i.e. render modes
|
|
|
|
|
- :attr:`np_random` - The random number generator for the environment
|
2016-08-24 23:10:58 +02:00
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
Note: a default reward range set to :math:`(-\infty,+\infty)` already exists. Set it if you want a narrower range.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
# Set this in SOME subclasses
|
2022-02-28 15:54:03 -05:00
|
|
|
|
metadata = {"render_modes": []}
|
2021-07-29 02:26:34 +02:00
|
|
|
|
reward_range = (-float("inf"), float("inf"))
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
spec = None
|
2016-05-15 15:59:02 -07:00
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
# Set these in ALL subclasses
|
2021-12-22 19:12:57 +01:00
|
|
|
|
action_space: spaces.Space[ActType]
|
|
|
|
|
observation_space: spaces.Space[ObsType]
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2021-12-08 22:14:15 +01:00
|
|
|
|
# Created
|
2022-05-10 15:35:45 +01:00
|
|
|
|
_np_random: Optional[RandomNumberGenerator] = None
|
2022-02-10 18:24:41 +01:00
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def np_random(self) -> RandomNumberGenerator:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed."""
|
2022-02-10 18:24:41 +01:00
|
|
|
|
if self._np_random is None:
|
|
|
|
|
self._np_random, seed = seeding.np_random()
|
|
|
|
|
return self._np_random
|
2021-12-08 22:14:15 +01:00
|
|
|
|
|
2022-02-18 01:38:22 +01:00
|
|
|
|
@np_random.setter
|
|
|
|
|
def np_random(self, value: RandomNumberGenerator):
|
|
|
|
|
self._np_random = value
|
|
|
|
|
|
2022-05-25 15:28:19 +01:00
|
|
|
|
def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Run one timestep of the environment's dynamics.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
When end of episode is reached, you are responsible for calling :meth:`reset` to reset this environment's state.
|
|
|
|
|
Accepts an action and returns a tuple `(observation, reward, done, info)`.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2016-05-27 12:16:35 -07:00
|
|
|
|
Args:
|
2022-05-25 14:46:41 +01:00
|
|
|
|
action (ActType): an action provided by the agent
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2016-05-27 12:16:35 -07:00
|
|
|
|
Returns:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
observation (object): this will be an element of the environment's :attr:`observation_space`.
|
|
|
|
|
This may, for instance, be a numpy array containing the positions and velocities of certain objects.
|
|
|
|
|
reward (float): The amount of reward returned as a result of taking the action.
|
|
|
|
|
done (bool): A boolean value for if the episode has ended, in which case further :meth:`step` calls will return undefined results.
|
|
|
|
|
A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully,
|
|
|
|
|
a certain timelimit was exceeded, or the physics simulation has entered an invalid state.
|
|
|
|
|
info (dictionary): A dictionary that may contain additional information regarding the reason for a ``done`` signal.
|
|
|
|
|
`info` contains auxiliary diagnostic information (helpful for debugging, learning, and logging).
|
|
|
|
|
This might, for instance, contain: metrics that describe the agent's performance state, variables that are
|
|
|
|
|
hidden from observations, information that distinguishes truncation and termination or individual reward terms
|
|
|
|
|
that are combined to produce the total reward
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
raise NotImplementedError
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-01-19 23:28:59 +01:00
|
|
|
|
def reset(
|
2022-02-06 17:28:27 -06:00
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
seed: Optional[int] = None,
|
|
|
|
|
return_info: bool = False,
|
|
|
|
|
options: Optional[dict] = None,
|
2022-05-25 15:28:19 +01:00
|
|
|
|
) -> Union[ObsType, Tuple[ObsType, dict]]:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Resets the environment to an initial state and returns the initial observation.
|
|
|
|
|
|
|
|
|
|
This method can reset the environment's random number generator(s) if ``seed`` is an integer or
|
|
|
|
|
if the environment has not yet initialized a random number generator.
|
|
|
|
|
If the environment already has a random number generator and :meth:`reset` is called with ``seed=None``,
|
|
|
|
|
the RNG should not be reset. Moreover, :meth:`reset` should (in the typical use case) be called with an
|
2022-02-13 01:39:03 +01:00
|
|
|
|
integer seed right after initialization and then never again.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2022-04-06 20:12:55 +01:00
|
|
|
|
Args:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
seed (optional int): The seed that is used to initialize the environment's PRNG.
|
|
|
|
|
If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed,
|
|
|
|
|
a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom).
|
|
|
|
|
However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset.
|
|
|
|
|
If you pass an integer, the PRNG will be reset even if it already exists.
|
|
|
|
|
Usually, you want to pass an integer *right after the environment has been initialized and then never again*.
|
|
|
|
|
Please refer to the minimal example above to see this paradigm in action.
|
|
|
|
|
return_info (bool): If true, return additional information along with initial observation.
|
|
|
|
|
This info should be analogous to the info returned in :meth:`step`
|
|
|
|
|
options (optional dict): Additional information to specify how the environment is reset (optional,
|
|
|
|
|
depending on the specific environment)
|
2022-04-06 20:12:55 +01:00
|
|
|
|
|
|
|
|
|
|
2019-08-23 15:02:33 -07:00
|
|
|
|
Returns:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
observation (object): Observation of the initial state. This will be an element of :attr:`observation_space`
|
|
|
|
|
(typically a numpy array) and is analogous to the observation returned by :meth:`step`.
|
|
|
|
|
info (optional dictionary): This will *only* be returned if ``return_info=True`` is passed.
|
|
|
|
|
It contains auxiliary information complementing ``observation``. This dictionary should be analogous to
|
|
|
|
|
the ``info`` returned by :meth:`step`.
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
2022-02-10 18:24:41 +01:00
|
|
|
|
# Initialize the RNG if the seed is manually passed
|
|
|
|
|
if seed is not None:
|
|
|
|
|
self._np_random, seed = seeding.np_random(seed)
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2021-08-05 10:35:48 -04:00
|
|
|
|
def render(self, mode="human"):
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""Renders the environment.
|
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
A set of supported modes varies per environment. (And some
|
2022-02-13 01:39:03 +01:00
|
|
|
|
third-party environments may not support rendering at all.)
|
|
|
|
|
By convention, if mode is:
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
|
|
- human: render to the current display or terminal and
|
|
|
|
|
return nothing. Usually for human consumption.
|
2022-05-10 15:35:45 +01:00
|
|
|
|
- rgb_array: Return a numpy.ndarray with shape (x, y, 3),
|
2016-04-27 08:00:58 -07:00
|
|
|
|
representing RGB values for an x-by-y pixel image, suitable
|
|
|
|
|
for turning into a video.
|
|
|
|
|
- ansi: Return a string (str) or StringIO.StringIO containing a
|
|
|
|
|
terminal-style text representation. The text can include newlines
|
|
|
|
|
and ANSI escape sequences (e.g. for colors).
|
2022-04-08 03:19:52 +02:00
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
Note:
|
2022-02-28 15:54:03 -05:00
|
|
|
|
Make sure that your class's metadata 'render_modes' key includes
|
2022-05-10 15:35:45 +01:00
|
|
|
|
the list of supported modes. It's recommended to call super()
|
|
|
|
|
in implementations to use the functionality of this method.
|
|
|
|
|
|
|
|
|
|
Example:
|
2022-05-25 14:46:41 +01:00
|
|
|
|
>>> import numpy as np
|
2022-05-10 15:35:45 +01:00
|
|
|
|
>>> class MyEnv(Env):
|
|
|
|
|
... metadata = {'render_modes': ['human', 'rgb_array']}
|
|
|
|
|
...
|
|
|
|
|
... def render(self, mode='human'):
|
|
|
|
|
... if mode == 'rgb_array':
|
|
|
|
|
... return np.array(...) # return RGB frame suitable for video
|
|
|
|
|
... elif mode == 'human':
|
|
|
|
|
... ... # pop up a window and render
|
|
|
|
|
... else:
|
2022-05-25 14:46:41 +01:00
|
|
|
|
... super().render(mode=mode) # just raise an exception
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
|
|
Args:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
mode: the mode to render with, valid modes are `env.metadata["render_modes"]`
|
2016-04-27 08:00:58 -07:00
|
|
|
|
"""
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
raise NotImplementedError
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2016-05-15 15:59:02 -07:00
|
|
|
|
def close(self):
|
2019-05-03 23:53:31 +02:00
|
|
|
|
"""Override close in your subclass to perform any necessary cleanup.
|
2016-05-27 12:16:35 -07:00
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
Environments will automatically :meth:`close()` themselves when
|
2016-05-27 12:16:35 -07:00
|
|
|
|
garbage collected or when the program exits.
|
2016-05-15 15:59:02 -07:00
|
|
|
|
"""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
pass
|
2016-05-15 15:59:02 -07:00
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
|
def seed(self, seed=None):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
""":deprecated: function that sets the seed for the environment's random number generator(s).
|
|
|
|
|
|
|
|
|
|
Use `env.reset(seed=seed)` as the new API for setting the seed of the environment.
|
2016-05-29 09:07:09 -07:00
|
|
|
|
|
|
|
|
|
Note:
|
|
|
|
|
Some environments use multiple pseudorandom number generators.
|
|
|
|
|
We want to capture all such seeds used in order to ensure that
|
|
|
|
|
there aren't accidental correlations between multiple generators.
|
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
Args:
|
|
|
|
|
seed(Optional int): The seed value for the random number geneartor
|
|
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
|
Returns:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
seeds (List[int]): Returns the list of seeds used in this environment's random
|
2016-05-29 09:07:09 -07:00
|
|
|
|
number generators. The first value in the list should be the
|
|
|
|
|
"main" seed, or the value which a reproducer should pass to
|
|
|
|
|
'seed'. Often, the main seed equals the provided 'seed', but
|
2022-05-10 15:35:45 +01:00
|
|
|
|
this won't be true `if seed=None`, for example.
|
2016-05-29 09:07:09 -07:00
|
|
|
|
"""
|
2021-12-08 22:14:15 +01:00
|
|
|
|
deprecation(
|
|
|
|
|
"Function `env.seed(seed)` is marked as deprecated and will be removed in the future. "
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"Please use `env.reset(seed=seed)` instead."
|
2021-12-08 22:14:15 +01:00
|
|
|
|
)
|
2022-02-10 18:24:41 +01:00
|
|
|
|
self._np_random, seed = seeding.np_random(seed)
|
2021-12-08 22:14:15 +01:00
|
|
|
|
return [seed]
|
2017-06-16 16:35:03 -07:00
|
|
|
|
|
2016-08-17 15:16:45 -07:00
|
|
|
|
@property
|
2022-05-25 15:28:19 +01:00
|
|
|
|
def unwrapped(self) -> "Env":
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the base non-wrapped environment.
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
|
|
|
|
Returns:
|
2022-05-25 14:46:41 +01:00
|
|
|
|
Env: The base non-wrapped gym.Env instance
|
2016-08-11 14:45:52 -07:00
|
|
|
|
"""
|
2017-02-26 00:01:00 -08:00
|
|
|
|
return self
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
|
def __str__(self):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns a string of the environment with the spec id if specified."""
|
2017-06-16 16:35:03 -07:00
|
|
|
|
if self.spec is None:
|
2021-11-14 14:50:40 +01:00
|
|
|
|
return f"<{type(self).__name__} instance>"
|
2017-06-16 16:35:03 -07:00
|
|
|
|
else:
|
2021-11-14 14:50:40 +01:00
|
|
|
|
return f"<{type(self).__name__}<{self.spec.id}>>"
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
2019-02-25 15:53:58 -08:00
|
|
|
|
def __enter__(self):
|
2021-07-29 02:26:34 +02:00
|
|
|
|
"""Support with-statement for the environment."""
|
2019-02-25 15:53:58 -08:00
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
def __exit__(self, *args):
|
2021-07-29 02:26:34 +02:00
|
|
|
|
"""Support with-statement for the environment."""
|
2019-02-25 15:53:58 -08:00
|
|
|
|
self.close()
|
|
|
|
|
# propagate exception
|
|
|
|
|
return False
|
|
|
|
|
|
2018-02-26 17:35:07 +01:00
|
|
|
|
|
2022-02-05 17:25:47 +01:00
|
|
|
|
class Wrapper(Env[ObsType, ActType]):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
|
2019-08-23 15:02:33 -07:00
|
|
|
|
|
2019-05-03 23:53:31 +02:00
|
|
|
|
This class is the base class for all wrappers. The subclass could override
|
|
|
|
|
some methods to change the behavior of the original environment without touching the
|
2019-08-23 15:02:33 -07:00
|
|
|
|
original code.
|
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
Note:
|
2019-05-03 23:53:31 +02:00
|
|
|
|
Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
|
|
|
|
|
"""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
|
2022-02-05 17:25:47 +01:00
|
|
|
|
def __init__(self, env: Env):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
env: The environment to wrap
|
|
|
|
|
"""
|
2016-08-13 19:24:48 -07:00
|
|
|
|
self.env = env
|
2021-09-17 18:02:59 -04:00
|
|
|
|
|
2022-05-10 15:35:45 +01:00
|
|
|
|
self._action_space: Optional[spaces.Space] = None
|
|
|
|
|
self._observation_space: Optional[spaces.Space] = None
|
2022-05-25 15:28:19 +01:00
|
|
|
|
self._reward_range: Optional[Tuple[SupportsFloat, SupportsFloat]] = None
|
2022-05-10 15:35:45 +01:00
|
|
|
|
self._metadata: Optional[dict] = None
|
2016-12-23 16:21:42 -08:00
|
|
|
|
|
2019-03-25 20:11:53 +01:00
|
|
|
|
def __getattr__(self, name):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns an attribute with ``name``, unless ``name`` starts with an underscore."""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
if name.startswith("_"):
|
2022-03-24 19:10:06 +01:00
|
|
|
|
raise AttributeError(f"accessing private attribute '{name}' is prohibited")
|
2019-03-25 20:11:53 +01:00
|
|
|
|
return getattr(self.env, name)
|
|
|
|
|
|
2019-06-28 15:27:43 -07:00
|
|
|
|
@property
|
|
|
|
|
def spec(self):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the environment specification."""
|
2019-06-28 15:27:43 -07:00
|
|
|
|
return self.env.spec
|
|
|
|
|
|
2016-12-23 16:21:42 -08:00
|
|
|
|
@classmethod
|
|
|
|
|
def class_name(cls):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the class name of the wrapper."""
|
2016-12-23 16:21:42 -08:00
|
|
|
|
return cls.__name__
|
|
|
|
|
|
2021-09-17 18:02:59 -04:00
|
|
|
|
@property
|
2022-02-05 17:25:47 +01:00
|
|
|
|
def action_space(self) -> spaces.Space[ActType]:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the action space of the environment."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._action_space is None:
|
|
|
|
|
return self.env.action_space
|
|
|
|
|
return self._action_space
|
|
|
|
|
|
|
|
|
|
@action_space.setter
|
2022-05-10 15:35:45 +01:00
|
|
|
|
def action_space(self, space: spaces.Space):
|
2021-09-17 18:02:59 -04:00
|
|
|
|
self._action_space = space
|
|
|
|
|
|
|
|
|
|
@property
|
2022-02-05 17:25:47 +01:00
|
|
|
|
def observation_space(self) -> spaces.Space:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the observation space of the environment."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._observation_space is None:
|
|
|
|
|
return self.env.observation_space
|
|
|
|
|
return self._observation_space
|
|
|
|
|
|
|
|
|
|
@observation_space.setter
|
2022-05-10 15:35:45 +01:00
|
|
|
|
def observation_space(self, space: spaces.Space):
|
2021-09-17 18:02:59 -04:00
|
|
|
|
self._observation_space = space
|
|
|
|
|
|
|
|
|
|
@property
|
2022-05-25 15:28:19 +01:00
|
|
|
|
def reward_range(self) -> Tuple[SupportsFloat, SupportsFloat]:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Return the reward range of the environment."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._reward_range is None:
|
|
|
|
|
return self.env.reward_range
|
|
|
|
|
return self._reward_range
|
|
|
|
|
|
|
|
|
|
@reward_range.setter
|
2022-05-25 15:28:19 +01:00
|
|
|
|
def reward_range(self, value: Tuple[SupportsFloat, SupportsFloat]):
|
2021-09-17 18:02:59 -04:00
|
|
|
|
self._reward_range = value
|
|
|
|
|
|
|
|
|
|
@property
|
2022-02-05 17:25:47 +01:00
|
|
|
|
def metadata(self) -> dict:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the environment metadata."""
|
2021-09-17 18:02:59 -04:00
|
|
|
|
if self._metadata is None:
|
|
|
|
|
return self.env.metadata
|
|
|
|
|
return self._metadata
|
|
|
|
|
|
|
|
|
|
@metadata.setter
|
|
|
|
|
def metadata(self, value):
|
|
|
|
|
self._metadata = value
|
|
|
|
|
|
2022-05-25 15:28:19 +01:00
|
|
|
|
def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Steps through the environment with action."""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
return self.env.step(action)
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
|
2022-05-25 15:28:19 +01:00
|
|
|
|
def reset(self, **kwargs) -> Union[ObsType, Tuple[ObsType, dict]]:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Resets the environment with kwargs."""
|
2022-01-19 23:28:59 +01:00
|
|
|
|
return self.env.reset(**kwargs)
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
2022-03-31 22:28:17 +02:00
|
|
|
|
def render(self, **kwargs):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Renders the environment with kwargs."""
|
2022-03-31 22:28:17 +02:00
|
|
|
|
return self.env.render(**kwargs)
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
def close(self):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Closes the environment."""
|
2019-03-25 18:23:14 +01:00
|
|
|
|
return self.env.close()
|
2016-08-11 14:45:52 -07:00
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
def seed(self, seed=None):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Seeds the environment."""
|
2016-08-11 14:45:52 -07:00
|
|
|
|
return self.env.seed(seed)
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the wrapper name and the unwrapped environment string."""
|
2021-11-14 14:50:40 +01:00
|
|
|
|
return f"<{type(self).__name__}{self.env}>"
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the string representation of the wrapper."""
|
2016-09-04 00:38:03 -07:00
|
|
|
|
return str(self)
|
2016-08-17 15:16:45 -07:00
|
|
|
|
|
|
|
|
|
@property
|
2022-02-05 17:25:47 +01:00
|
|
|
|
def unwrapped(self) -> Env:
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns the base environment of the wrapper."""
|
2017-02-26 00:01:00 -08:00
|
|
|
|
return self.env.unwrapped
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
class ObservationWrapper(Wrapper):
|
2022-05-13 13:58:19 +01:00
|
|
|
|
"""Superclass of wrappers that can modify observations using :meth:`observation` for :meth:`reset` and :meth:`step`.
|
|
|
|
|
|
|
|
|
|
If you would like to apply a function to the observation that is returned by the base environment before
|
|
|
|
|
passing it to learning code, you can simply inherit from :class:`ObservationWrapper` and overwrite the method
|
|
|
|
|
:meth:`observation` to implement that transformation. The transformation defined in that method must be
|
|
|
|
|
defined on the base environment’s observation space. However, it may take values in a different space.
|
|
|
|
|
In that case, you need to specify the new observation space of the wrapper by setting :attr:`self.observation_space`
|
|
|
|
|
in the :meth:`__init__` method of your wrapper.
|
|
|
|
|
|
|
|
|
|
For example, you might have a 2D navigation task where the environment returns dictionaries as observations with
|
|
|
|
|
keys ``"agent_position"`` and ``"target_position"``. A common thing to do might be to throw away some degrees of
|
|
|
|
|
freedom and only consider the position of the target relative to the agent, i.e.
|
|
|
|
|
``observation["target_position"] - observation["agent_position"]``. For this, you could implement an
|
|
|
|
|
observation wrapper like this::
|
|
|
|
|
|
|
|
|
|
class RelativePosition(gym.ObservationWrapper):
|
|
|
|
|
def __init__(self, env):
|
|
|
|
|
super().__init__(env)
|
|
|
|
|
self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)
|
|
|
|
|
|
|
|
|
|
def observation(self, obs):
|
|
|
|
|
return obs["target"] - obs["agent"]
|
|
|
|
|
|
|
|
|
|
Among others, Gym provides the observation wrapper :class:`TimeAwareObservation`, which adds information about the
|
|
|
|
|
index of the timestep to the observation.
|
|
|
|
|
"""
|
2022-05-10 15:35:45 +01:00
|
|
|
|
|
2022-01-19 23:28:59 +01:00
|
|
|
|
def reset(self, **kwargs):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Resets the environment, returning a modified observation using :meth:`self.observation`."""
|
2022-02-17 18:03:35 +01:00
|
|
|
|
if kwargs.get("return_info", False):
|
|
|
|
|
obs, info = self.env.reset(**kwargs)
|
|
|
|
|
return self.observation(obs), info
|
|
|
|
|
else:
|
|
|
|
|
return self.observation(self.env.reset(**kwargs))
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
|
2019-05-03 23:53:31 +02:00
|
|
|
|
def step(self, action):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns a modified observation using :meth:`self.observation` after calling :meth:`env.step`."""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
observation, reward, done, info = self.env.step(action)
|
|
|
|
|
return self.observation(observation), reward, done, info
|
|
|
|
|
|
2016-09-04 01:44:20 -07:00
|
|
|
|
def observation(self, observation):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns a modified observation."""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
raise NotImplementedError
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RewardWrapper(Wrapper):
|
2022-05-13 13:58:19 +01:00
|
|
|
|
"""Superclass of wrappers that can modify the returning reward from a step.
|
|
|
|
|
|
|
|
|
|
If you would like to apply a function to the reward that is returned by the base environment before
|
|
|
|
|
passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method
|
|
|
|
|
:meth:`reward` to implement that transformation.
|
|
|
|
|
This transformation might change the reward range; to specify the reward range of your wrapper,
|
|
|
|
|
you can simply define :attr:`self.reward_range` in :meth:`__init__`.
|
|
|
|
|
|
|
|
|
|
Let us look at an example: Sometimes (especially when we do not have control over the reward
|
|
|
|
|
because it is intrinsic), we want to clip the reward to a range to gain some numerical stability.
|
|
|
|
|
To do that, we could, for instance, implement the following wrapper::
|
|
|
|
|
|
|
|
|
|
class ClipReward(gym.RewardWrapper):
|
|
|
|
|
def __init__(self, env, min_reward, max_reward):
|
|
|
|
|
super().__init__(env)
|
|
|
|
|
self.min_reward = min_reward
|
|
|
|
|
self.max_reward = max_reward
|
|
|
|
|
self.reward_range = (min_reward, max_reward)
|
|
|
|
|
|
|
|
|
|
def reward(self, reward):
|
|
|
|
|
return np.clip(reward, self.min_reward, self.max_reward)
|
|
|
|
|
"""
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
|
|
|
|
|
def step(self, action):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Modifies the reward using :meth:`self.reward` after the environment :meth:`env.step`."""
|
2016-09-04 00:38:03 -07:00
|
|
|
|
observation, reward, done, info = self.env.step(action)
|
2016-09-04 01:44:20 -07:00
|
|
|
|
return observation, self.reward(reward), done, info
|
|
|
|
|
|
|
|
|
|
def reward(self, reward):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns a modified ``reward``."""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
raise NotImplementedError
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ActionWrapper(Wrapper):
|
2022-05-13 13:58:19 +01:00
|
|
|
|
"""Superclass of wrappers that can modify the action before :meth:`env.step`.
|
|
|
|
|
|
|
|
|
|
If you would like to apply a function to the action before passing it to the base environment,
|
|
|
|
|
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
|
|
|
|
|
that transformation. The transformation defined in that method must take values in the base environment’s
|
|
|
|
|
action space. However, its domain might differ from the original action space.
|
|
|
|
|
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in
|
|
|
|
|
the :meth:`__init__` method of your wrapper.
|
|
|
|
|
|
|
|
|
|
Let’s say you have an environment with action space of type :class:`gym.spaces.Box`, but you would only like
|
|
|
|
|
to use a finite subset of actions. Then, you might want to implement the following wrapper::
|
|
|
|
|
|
|
|
|
|
class DiscreteActions(gym.ActionWrapper):
|
|
|
|
|
def __init__(self, env, disc_to_cont):
|
|
|
|
|
super().__init__(env)
|
|
|
|
|
self.disc_to_cont = disc_to_cont
|
|
|
|
|
self.action_space = Discrete(len(disc_to_cont))
|
|
|
|
|
|
|
|
|
|
def action(self, act):
|
|
|
|
|
return self.disc_to_cont[act]
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
env = gym.make("LunarLanderContinuous-v2")
|
|
|
|
|
wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]),
|
|
|
|
|
np.array([0,1]), np.array([0,-1])])
|
|
|
|
|
print(wrapped_env.action_space) #Discrete(4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Among others, Gym provides the action wrappers :class:`ClipAction` and :class:`RescaleAction`.
|
|
|
|
|
"""
|
2019-03-25 18:23:14 +01:00
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
def step(self, action):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Runs the environment :meth:`env.step` using the modified ``action`` from :meth:`self.action`."""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
return self.env.step(self.action(action))
|
2016-09-04 00:38:03 -07:00
|
|
|
|
|
2016-09-04 01:44:20 -07:00
|
|
|
|
def action(self, action):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns a modified action before :meth:`env.step` is called."""
|
2019-05-03 23:53:31 +02:00
|
|
|
|
raise NotImplementedError
|
2016-09-04 01:44:20 -07:00
|
|
|
|
|
2016-10-14 22:07:47 -07:00
|
|
|
|
def reverse_action(self, action):
|
2022-05-10 15:35:45 +01:00
|
|
|
|
"""Returns a reversed ``action``."""
|
2021-07-29 02:26:34 +02:00
|
|
|
|
raise NotImplementedError
|