mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-16 19:49:13 +00:00
fix and complete docstrings to match the documentation page (#2697)
This commit is contained in:
33
gym/core.py
33
gym/core.py
@@ -63,7 +63,7 @@ class Env(Generic[ObsType, ActType]):
|
||||
@abstractmethod
|
||||
def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
|
||||
"""Run one timestep of the environment's dynamics. When end of
|
||||
episode is reached, you are responsible for calling `reset()`
|
||||
episode is reached, you are responsible for calling :meth:`reset`
|
||||
to reset this environment's state.
|
||||
|
||||
Accepts an action and returns a tuple (observation, reward, done, info).
|
||||
@@ -71,11 +71,18 @@ class Env(Generic[ObsType, ActType]):
|
||||
Args:
|
||||
action (object): an action provided by the agent
|
||||
|
||||
This method returns a tuple ``(observation, reward, done, info)``
|
||||
|
||||
Returns:
|
||||
observation (object): agent's observation of the current environment
|
||||
observation (object): agent's observation of the current environment. This will be an element of the environment's :attr:`observation_space`. This may, for instance, be a numpy array containing the positions and velocities of certain objects.
|
||||
reward (float) : amount of reward returned after previous action
|
||||
done (bool): whether the episode has ended, in which case further step() calls will return undefined results
|
||||
info (dict): contains auxiliary diagnostic information (helpful for debugging, logging, and sometimes learning)
|
||||
done (bool): whether the episode has ended, in which case further :meth:`step` calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state. ``info`` may contain additional information regarding the reason for a ``done`` signal.
|
||||
info (dict): contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain:
|
||||
|
||||
- metrics that describe the agent's performance or
|
||||
- state variables that are hidden from observations or
|
||||
- information that distinguishes truncation and termination or
|
||||
- individual reward terms that are combined to produce the total reward
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -91,16 +98,22 @@ class Env(Generic[ObsType, ActType]):
|
||||
observation.
|
||||
|
||||
This method should also reset the environment's random number
|
||||
generator(s) if `seed` is an integer or if the environment has not
|
||||
generator(s) if ``seed`` is an integer or if the environment has not
|
||||
yet initialized a random number generator. If the environment already
|
||||
has a random number generator and `reset` is called with `seed=None`,
|
||||
has a random number generator and :meth:`reset` is called with ``seed=None``,
|
||||
the RNG should not be reset.
|
||||
Moreover, `reset` should (in the typical use case) be called with an
|
||||
Moreover, :meth:`reset` should (in the typical use case) be called with an
|
||||
integer seed right after initialization and then never again.
|
||||
|
||||
Args:
|
||||
seed (int or None): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is pased, the PRNG will *not* be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action.
|
||||
return_info (bool): If true, return additional information along with initial observation. This info should be analogous to the info returned in :meth:`step`
|
||||
options (dict or None): Additional information to specify how the environment is reset (optional, depending on the specific environment)
|
||||
|
||||
|
||||
Returns:
|
||||
observation (object): the initial observation.
|
||||
info (optional dictionary): a dictionary containing extra information, this is only returned if return_info is set to true
|
||||
observation (object): Observation of the initial state. This will be an element of :attr:`observation_space` (usually a numpy array) and is analogous to the observation returned by :meth:`step`.
|
||||
info (optional dictionary): This will *only* be returned if ``return_info=True`` is passed. It contains auxiliary information complementing ``observation``. This dictionary should be analogous to the ``info`` returned by :meth:`step`.
|
||||
"""
|
||||
# Initialize the RNG if the seed is manually passed
|
||||
if seed is not None:
|
||||
@@ -131,7 +144,7 @@ class Env(Generic[ObsType, ActType]):
|
||||
Args:
|
||||
mode (str): the mode to render with
|
||||
|
||||
Example:
|
||||
Example::
|
||||
|
||||
class MyEnv(Env):
|
||||
metadata = {'render_modes': ['human', 'rgb_array']}
|
||||
|
@@ -29,10 +29,12 @@ class Box(Space[np.ndarray]):
|
||||
There are two common use cases:
|
||||
|
||||
* Identical bound for each dimension::
|
||||
|
||||
>>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
|
||||
Box(3, 4)
|
||||
|
||||
* Independent bound for each dimension::
|
||||
|
||||
>>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
|
||||
Box(2,)
|
||||
|
||||
|
@@ -13,10 +13,12 @@ class Dict(Space[TypingDict[str, Space]], Mapping):
|
||||
"""
|
||||
A dictionary of simpler spaces.
|
||||
|
||||
Example usage:
|
||||
Example usage::
|
||||
|
||||
self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
||||
|
||||
Example usage [nested]:
|
||||
Example usage [nested]::
|
||||
|
||||
self.nested_observation_space = spaces.Dict({
|
||||
'sensors': spaces.Dict({
|
||||
'position': spaces.Box(low=-100, high=100, shape=(3,)),
|
||||
|
@@ -6,10 +6,10 @@ from .space import Space
|
||||
|
||||
|
||||
class Discrete(Space[int]):
|
||||
r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
|
||||
r"""A discrete space in :math:`\{ 0, 1, \dots, n-1 \}`.
|
||||
|
||||
A start value can be optionally specified to shift the range
|
||||
to :math:`\{ a, a+1, \\dots, a+n-1 \}`.
|
||||
to :math:`\{ a, a+1, \dots, a+n-1 \}`.
|
||||
|
||||
Example::
|
||||
|
||||
|
@@ -11,20 +11,15 @@ class MultiBinary(Space[np.ndarray]):
|
||||
"""
|
||||
An n-shape binary space.
|
||||
|
||||
The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
|
||||
The argument to MultiBinary defines n, which could be a number or a ``list`` of numbers.
|
||||
|
||||
Example Usage:
|
||||
|
||||
>> self.observation_space = spaces.MultiBinary(5)
|
||||
|
||||
>> self.observation_space.sample()
|
||||
Example Usage::
|
||||
|
||||
>>> self.observation_space = spaces.MultiBinary(5)
|
||||
>>> self.observation_space.sample()
|
||||
array([0, 1, 0, 1, 0], dtype=int8)
|
||||
|
||||
>> self.observation_space = spaces.MultiBinary([3, 2])
|
||||
|
||||
>> self.observation_space.sample()
|
||||
|
||||
>>> self.observation_space = spaces.MultiBinary([3, 2])
|
||||
>>> self.observation_space.sample()
|
||||
array([[0, 0],
|
||||
[0, 1],
|
||||
[1, 1]], dtype=int8)
|
||||
|
@@ -12,22 +12,19 @@ from .space import Space
|
||||
|
||||
class MultiDiscrete(Space[np.ndarray]):
|
||||
"""
|
||||
- The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each
|
||||
- It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
|
||||
- It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space
|
||||
The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each. It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space. It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space.
|
||||
|
||||
Note: Some environment wrappers assume a value of 0 always represents the NOOP action.
|
||||
Note:
|
||||
|
||||
e.g. Nintendo Game Controller
|
||||
- Can be conceptualized as 3 discrete action spaces:
|
||||
Some environment wrappers assume a value of 0 always represents the NOOP action.
|
||||
|
||||
1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
|
||||
2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces:
|
||||
|
||||
- Can be initialized as
|
||||
1. Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
|
||||
2. Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
3. Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
|
||||
MultiDiscrete([ 5, 2, 2 ])
|
||||
It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])``
|
||||
|
||||
"""
|
||||
|
||||
|
@@ -10,7 +10,8 @@ class Tuple(Space[tuple], Sequence):
|
||||
"""
|
||||
A tuple (i.e., product) of simpler spaces
|
||||
|
||||
Example usage:
|
||||
Example usage::
|
||||
|
||||
self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
|
||||
"""
|
||||
|
||||
|
@@ -17,6 +17,12 @@ def flatdim(space: Space) -> int:
|
||||
|
||||
Accepts a space and returns an integer. Raises ``NotImplementedError`` if
|
||||
the space is not defined in ``gym.spaces``.
|
||||
|
||||
Example usage::
|
||||
|
||||
>>> s = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
|
||||
>>> spaces.flatdim(s)
|
||||
5
|
||||
"""
|
||||
raise NotImplementedError(f"Unknown space: `{space}`")
|
||||
|
||||
@@ -101,8 +107,8 @@ def _flatten_dict(space, x) -> np.ndarray:
|
||||
def unflatten(space: Space[T], x: np.ndarray) -> T:
|
||||
"""Unflatten a data point from a space.
|
||||
|
||||
This reverses the transformation applied by ``flatten()``. You must ensure
|
||||
that the ``space`` argument is the same as for the ``flatten()`` call.
|
||||
This reverses the transformation applied by :func:`flatten`. You must ensure
|
||||
that the ``space`` argument is the same as for the :func:`flatten` call.
|
||||
|
||||
Accepts a space and a flattened point. Returns a point with a structure
|
||||
that matches the space. Raises ``NotImplementedError`` if the space is not
|
||||
@@ -156,9 +162,9 @@ def _unflatten_dict(space: Dict, x: np.ndarray) -> dict:
|
||||
def flatten_space(space: Space) -> Box:
|
||||
"""Flatten a space into a single ``Box``.
|
||||
|
||||
This is equivalent to ``flatten()``, but operates on the space itself. The
|
||||
This is equivalent to :func:`flatten`, but operates on the space itself. The
|
||||
result always is a `Box` with flat boundaries. The box has exactly
|
||||
``flatdim(space)`` dimensions. Flattening a sample of the original space
|
||||
:func:`flatdim` dimensions. Flattening a sample of the original space
|
||||
has the same effect as taking a sample of the flattenend space.
|
||||
|
||||
Raises ``NotImplementedError`` if the space is not defined in
|
||||
|
@@ -73,7 +73,7 @@ class VectorEnv(gym.Env):
|
||||
|
||||
Returns
|
||||
-------
|
||||
element of :attr:`observation_space`
|
||||
observations : element of :attr:`observation_space`
|
||||
A batch of observations from the vectorized environment.
|
||||
"""
|
||||
self.reset_async(seed=seed, return_info=return_info, options=options)
|
||||
|
Reference in New Issue
Block a user