mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-16 19:49:13 +00:00
fix and complete docstrings to match the documentation page (#2697)
This commit is contained in:
53
gym/core.py
53
gym/core.py
@@ -63,7 +63,7 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
|
def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
|
||||||
"""Run one timestep of the environment's dynamics. When end of
|
"""Run one timestep of the environment's dynamics. When end of
|
||||||
episode is reached, you are responsible for calling `reset()`
|
episode is reached, you are responsible for calling :meth:`reset`
|
||||||
to reset this environment's state.
|
to reset this environment's state.
|
||||||
|
|
||||||
Accepts an action and returns a tuple (observation, reward, done, info).
|
Accepts an action and returns a tuple (observation, reward, done, info).
|
||||||
@@ -71,11 +71,18 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
Args:
|
Args:
|
||||||
action (object): an action provided by the agent
|
action (object): an action provided by the agent
|
||||||
|
|
||||||
|
This method returns a tuple ``(observation, reward, done, info)``
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
observation (object): agent's observation of the current environment
|
observation (object): agent's observation of the current environment. This will be an element of the environment's :attr:`observation_space`. This may, for instance, be a numpy array containing the positions and velocities of certain objects.
|
||||||
reward (float) : amount of reward returned after previous action
|
reward (float) : amount of reward returned after previous action
|
||||||
done (bool): whether the episode has ended, in which case further step() calls will return undefined results
|
done (bool): whether the episode has ended, in which case further :meth:`step` calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state. ``info`` may contain additional information regarding the reason for a ``done`` signal.
|
||||||
info (dict): contains auxiliary diagnostic information (helpful for debugging, logging, and sometimes learning)
|
info (dict): contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain:
|
||||||
|
|
||||||
|
- metrics that describe the agent's performance or
|
||||||
|
- state variables that are hidden from observations or
|
||||||
|
- information that distinguishes truncation and termination or
|
||||||
|
- individual reward terms that are combined to produce the total reward
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -91,16 +98,22 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
observation.
|
observation.
|
||||||
|
|
||||||
This method should also reset the environment's random number
|
This method should also reset the environment's random number
|
||||||
generator(s) if `seed` is an integer or if the environment has not
|
generator(s) if ``seed`` is an integer or if the environment has not
|
||||||
yet initialized a random number generator. If the environment already
|
yet initialized a random number generator. If the environment already
|
||||||
has a random number generator and `reset` is called with `seed=None`,
|
has a random number generator and :meth:`reset` is called with ``seed=None``,
|
||||||
the RNG should not be reset.
|
the RNG should not be reset.
|
||||||
Moreover, `reset` should (in the typical use case) be called with an
|
Moreover, :meth:`reset` should (in the typical use case) be called with an
|
||||||
integer seed right after initialization and then never again.
|
integer seed right after initialization and then never again.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
seed (int or None): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is pased, the PRNG will *not* be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action.
|
||||||
|
return_info (bool): If true, return additional information along with initial observation. This info should be analogous to the info returned in :meth:`step`
|
||||||
|
options (dict or None): Additional information to specify how the environment is reset (optional, depending on the specific environment)
|
||||||
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
observation (object): the initial observation.
|
observation (object): Observation of the initial state. This will be an element of :attr:`observation_space` (usually a numpy array) and is analogous to the observation returned by :meth:`step`.
|
||||||
info (optional dictionary): a dictionary containing extra information, this is only returned if return_info is set to true
|
info (optional dictionary): This will *only* be returned if ``return_info=True`` is passed. It contains auxiliary information complementing ``observation``. This dictionary should be analogous to the ``info`` returned by :meth:`step`.
|
||||||
"""
|
"""
|
||||||
# Initialize the RNG if the seed is manually passed
|
# Initialize the RNG if the seed is manually passed
|
||||||
if seed is not None:
|
if seed is not None:
|
||||||
@@ -122,7 +135,7 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
- ansi: Return a string (str) or StringIO.StringIO containing a
|
- ansi: Return a string (str) or StringIO.StringIO containing a
|
||||||
terminal-style text representation. The text can include newlines
|
terminal-style text representation. The text can include newlines
|
||||||
and ANSI escape sequences (e.g. for colors).
|
and ANSI escape sequences (e.g. for colors).
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
Make sure that your class's metadata 'render_modes' key includes
|
Make sure that your class's metadata 'render_modes' key includes
|
||||||
the list of supported modes. It's recommended to call super()
|
the list of supported modes. It's recommended to call super()
|
||||||
@@ -131,18 +144,18 @@ class Env(Generic[ObsType, ActType]):
|
|||||||
Args:
|
Args:
|
||||||
mode (str): the mode to render with
|
mode (str): the mode to render with
|
||||||
|
|
||||||
Example:
|
Example::
|
||||||
|
|
||||||
class MyEnv(Env):
|
class MyEnv(Env):
|
||||||
metadata = {'render_modes': ['human', 'rgb_array']}
|
metadata = {'render_modes': ['human', 'rgb_array']}
|
||||||
|
|
||||||
def render(self, mode='human'):
|
def render(self, mode='human'):
|
||||||
if mode == 'rgb_array':
|
if mode == 'rgb_array':
|
||||||
return np.array(...) # return RGB frame suitable for video
|
return np.array(...) # return RGB frame suitable for video
|
||||||
elif mode == 'human':
|
elif mode == 'human':
|
||||||
... # pop up a window and render
|
... # pop up a window and render
|
||||||
else:
|
else:
|
||||||
super(MyEnv, self).render(mode=mode) # just raise an exception
|
super(MyEnv, self).render(mode=mode) # just raise an exception
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@@ -29,10 +29,12 @@ class Box(Space[np.ndarray]):
|
|||||||
There are two common use cases:
|
There are two common use cases:
|
||||||
|
|
||||||
* Identical bound for each dimension::
|
* Identical bound for each dimension::
|
||||||
|
|
||||||
>>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
|
>>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
|
||||||
Box(3, 4)
|
Box(3, 4)
|
||||||
|
|
||||||
* Independent bound for each dimension::
|
* Independent bound for each dimension::
|
||||||
|
|
||||||
>>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
|
>>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
|
||||||
Box(2,)
|
Box(2,)
|
||||||
|
|
||||||
|
@@ -13,30 +13,32 @@ class Dict(Space[TypingDict[str, Space]], Mapping):
|
|||||||
"""
|
"""
|
||||||
A dictionary of simpler spaces.
|
A dictionary of simpler spaces.
|
||||||
|
|
||||||
Example usage:
|
Example usage::
|
||||||
self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
|
||||||
|
|
||||||
Example usage [nested]:
|
self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
||||||
self.nested_observation_space = spaces.Dict({
|
|
||||||
'sensors': spaces.Dict({
|
Example usage [nested]::
|
||||||
'position': spaces.Box(low=-100, high=100, shape=(3,)),
|
|
||||||
'velocity': spaces.Box(low=-1, high=1, shape=(3,)),
|
self.nested_observation_space = spaces.Dict({
|
||||||
'front_cam': spaces.Tuple((
|
'sensors': spaces.Dict({
|
||||||
spaces.Box(low=0, high=1, shape=(10, 10, 3)),
|
'position': spaces.Box(low=-100, high=100, shape=(3,)),
|
||||||
spaces.Box(low=0, high=1, shape=(10, 10, 3))
|
'velocity': spaces.Box(low=-1, high=1, shape=(3,)),
|
||||||
)),
|
'front_cam': spaces.Tuple((
|
||||||
'rear_cam': spaces.Box(low=0, high=1, shape=(10, 10, 3)),
|
spaces.Box(low=0, high=1, shape=(10, 10, 3)),
|
||||||
}),
|
spaces.Box(low=0, high=1, shape=(10, 10, 3))
|
||||||
'ext_controller': spaces.MultiDiscrete((5, 2, 2)),
|
)),
|
||||||
'inner_state':spaces.Dict({
|
'rear_cam': spaces.Box(low=0, high=1, shape=(10, 10, 3)),
|
||||||
'charge': spaces.Discrete(100),
|
}),
|
||||||
'system_checks': spaces.MultiBinary(10),
|
'ext_controller': spaces.MultiDiscrete((5, 2, 2)),
|
||||||
'job_status': spaces.Dict({
|
'inner_state':spaces.Dict({
|
||||||
'task': spaces.Discrete(5),
|
'charge': spaces.Discrete(100),
|
||||||
'progress': spaces.Box(low=0, high=100, shape=()),
|
'system_checks': spaces.MultiBinary(10),
|
||||||
|
'job_status': spaces.Dict({
|
||||||
|
'task': spaces.Discrete(5),
|
||||||
|
'progress': spaces.Box(low=0, high=100, shape=()),
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@@ -6,10 +6,10 @@ from .space import Space
|
|||||||
|
|
||||||
|
|
||||||
class Discrete(Space[int]):
|
class Discrete(Space[int]):
|
||||||
r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
|
r"""A discrete space in :math:`\{ 0, 1, \dots, n-1 \}`.
|
||||||
|
|
||||||
A start value can be optionally specified to shift the range
|
A start value can be optionally specified to shift the range
|
||||||
to :math:`\{ a, a+1, \\dots, a+n-1 \}`.
|
to :math:`\{ a, a+1, \dots, a+n-1 \}`.
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
|
@@ -11,23 +11,18 @@ class MultiBinary(Space[np.ndarray]):
|
|||||||
"""
|
"""
|
||||||
An n-shape binary space.
|
An n-shape binary space.
|
||||||
|
|
||||||
The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
|
The argument to MultiBinary defines n, which could be a number or a ``list`` of numbers.
|
||||||
|
|
||||||
Example Usage:
|
Example Usage::
|
||||||
|
|
||||||
>> self.observation_space = spaces.MultiBinary(5)
|
>>> self.observation_space = spaces.MultiBinary(5)
|
||||||
|
>>> self.observation_space.sample()
|
||||||
>> self.observation_space.sample()
|
array([0, 1, 0, 1, 0], dtype=int8)
|
||||||
|
>>> self.observation_space = spaces.MultiBinary([3, 2])
|
||||||
array([0, 1, 0, 1, 0], dtype=int8)
|
>>> self.observation_space.sample()
|
||||||
|
array([[0, 0],
|
||||||
>> self.observation_space = spaces.MultiBinary([3, 2])
|
[0, 1],
|
||||||
|
[1, 1]], dtype=int8)
|
||||||
>> self.observation_space.sample()
|
|
||||||
|
|
||||||
array([[0, 0],
|
|
||||||
[0, 1],
|
|
||||||
[1, 1]], dtype=int8)
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@@ -12,22 +12,19 @@ from .space import Space
|
|||||||
|
|
||||||
class MultiDiscrete(Space[np.ndarray]):
|
class MultiDiscrete(Space[np.ndarray]):
|
||||||
"""
|
"""
|
||||||
- The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each
|
The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each. It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space. It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space.
|
||||||
- It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
|
|
||||||
- It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space
|
Note:
|
||||||
|
|
||||||
|
Some environment wrappers assume a value of 0 always represents the NOOP action.
|
||||||
|
|
||||||
Note: Some environment wrappers assume a value of 0 always represents the NOOP action.
|
e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces:
|
||||||
|
|
||||||
e.g. Nintendo Game Controller
|
1. Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
|
||||||
- Can be conceptualized as 3 discrete action spaces:
|
2. Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||||
|
3. Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||||
|
|
||||||
1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
|
It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])``
|
||||||
2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
|
||||||
3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
|
||||||
|
|
||||||
- Can be initialized as
|
|
||||||
|
|
||||||
MultiDiscrete([ 5, 2, 2 ])
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@@ -10,8 +10,9 @@ class Tuple(Space[tuple], Sequence):
|
|||||||
"""
|
"""
|
||||||
A tuple (i.e., product) of simpler spaces
|
A tuple (i.e., product) of simpler spaces
|
||||||
|
|
||||||
Example usage:
|
Example usage::
|
||||||
self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
|
|
||||||
|
self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@@ -17,6 +17,12 @@ def flatdim(space: Space) -> int:
|
|||||||
|
|
||||||
Accepts a space and returns an integer. Raises ``NotImplementedError`` if
|
Accepts a space and returns an integer. Raises ``NotImplementedError`` if
|
||||||
the space is not defined in ``gym.spaces``.
|
the space is not defined in ``gym.spaces``.
|
||||||
|
|
||||||
|
Example usage::
|
||||||
|
|
||||||
|
>>> s = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
||||||
|
>>> spaces.flatdim(s)
|
||||||
|
5
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError(f"Unknown space: `{space}`")
|
raise NotImplementedError(f"Unknown space: `{space}`")
|
||||||
|
|
||||||
@@ -101,8 +107,8 @@ def _flatten_dict(space, x) -> np.ndarray:
|
|||||||
def unflatten(space: Space[T], x: np.ndarray) -> T:
|
def unflatten(space: Space[T], x: np.ndarray) -> T:
|
||||||
"""Unflatten a data point from a space.
|
"""Unflatten a data point from a space.
|
||||||
|
|
||||||
This reverses the transformation applied by ``flatten()``. You must ensure
|
This reverses the transformation applied by :func:`flatten`. You must ensure
|
||||||
that the ``space`` argument is the same as for the ``flatten()`` call.
|
that the ``space`` argument is the same as for the :func:`flatten` call.
|
||||||
|
|
||||||
Accepts a space and a flattened point. Returns a point with a structure
|
Accepts a space and a flattened point. Returns a point with a structure
|
||||||
that matches the space. Raises ``NotImplementedError`` if the space is not
|
that matches the space. Raises ``NotImplementedError`` if the space is not
|
||||||
@@ -156,9 +162,9 @@ def _unflatten_dict(space: Dict, x: np.ndarray) -> dict:
|
|||||||
def flatten_space(space: Space) -> Box:
|
def flatten_space(space: Space) -> Box:
|
||||||
"""Flatten a space into a single ``Box``.
|
"""Flatten a space into a single ``Box``.
|
||||||
|
|
||||||
This is equivalent to ``flatten()``, but operates on the space itself. The
|
This is equivalent to :func:`flatten`, but operates on the space itself. The
|
||||||
result always is a `Box` with flat boundaries. The box has exactly
|
result always is a `Box` with flat boundaries. The box has exactly
|
||||||
``flatdim(space)`` dimensions. Flattening a sample of the original space
|
:func:`flatdim` dimensions. Flattening a sample of the original space
|
||||||
has the same effect as taking a sample of the flattenend space.
|
has the same effect as taking a sample of the flattenend space.
|
||||||
|
|
||||||
Raises ``NotImplementedError`` if the space is not defined in
|
Raises ``NotImplementedError`` if the space is not defined in
|
||||||
|
@@ -73,7 +73,7 @@ class VectorEnv(gym.Env):
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
element of :attr:`observation_space`
|
observations : element of :attr:`observation_space`
|
||||||
A batch of observations from the vectorized environment.
|
A batch of observations from the vectorized environment.
|
||||||
"""
|
"""
|
||||||
self.reset_async(seed=seed, return_info=return_info, options=options)
|
self.reset_async(seed=seed, return_info=return_info, options=options)
|
||||||
|
Reference in New Issue
Block a user