fix and complete docstrings to match the documentation page (#2697)

2025-08-16 19:49:13 +00:00 · 2022-04-06 20:12:55 +01:00
parent 07fd227a4f
commit 658d4081c0
9 changed files with 94 additions and 78 deletions
--- a/gym/core.py
+++ b/gym/core.py
@@ -63,7 +63,7 @@ class Env(Generic[ObsType, ActType]):
    @abstractmethod
    def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
        """Run one timestep of the environment's dynamics. When end of
-        episode is reached, you are responsible for calling `reset()`
+        episode is reached, you are responsible for calling :meth:`reset`
        to reset this environment's state.

        Accepts an action and returns a tuple (observation, reward, done, info).
@@ -71,11 +71,18 @@ class Env(Generic[ObsType, ActType]):
        Args:
            action (object): an action provided by the agent

+        This method returns a tuple ``(observation, reward, done, info)``
+
        Returns:
-            observation (object): agent's observation of the current environment
+            observation (object): agent's observation of the current environment. This will be an element of the environment's :attr:`observation_space`. This may, for instance, be a numpy array containing the positions and velocities of certain objects.
            reward (float) : amount of reward returned after previous action
-            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
-            info (dict): contains auxiliary diagnostic information (helpful for debugging, logging, and sometimes learning)
+            done (bool): whether the episode has ended, in which case further :meth:`step` calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state. ``info`` may contain additional information regarding the reason for a ``done`` signal.
+            info (dict): contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain:
+                
+                - metrics that describe the agent's performance or
+                - state variables that are hidden from observations or
+                - information that distinguishes truncation and termination or
+                - individual reward terms that are combined to produce the total reward
        """
        raise NotImplementedError

@@ -91,16 +98,22 @@ class Env(Generic[ObsType, ActType]):
        observation.

        This method should also reset the environment's random number
-        generator(s) if `seed` is an integer or if the environment has not
+        generator(s) if ``seed`` is an integer or if the environment has not
        yet initialized a random number generator. If the environment already
-        has a random number generator and `reset` is called with `seed=None`,
+        has a random number generator and :meth:`reset` is called with ``seed=None``,
        the RNG should not be reset.
-        Moreover, `reset` should (in the typical use case) be called with an
+        Moreover, :meth:`reset` should (in the typical use case) be called with an
        integer seed right after initialization and then never again.

+        Args:
+            seed (int or None): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is pased, the PRNG will *not* be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action.
+            return_info (bool): If true, return additional information along with initial observation. This info should be analogous to the info returned in :meth:`step`
+            options (dict or None): Additional information to specify how the environment is reset (optional, depending on the specific environment)
+
+
        Returns:
-            observation (object): the initial observation.
-            info (optional dictionary): a dictionary containing extra information, this is only returned if return_info is set to true
+            observation (object): Observation of the initial state. This will be an element of :attr:`observation_space` (usually a numpy array) and is analogous to the observation returned by :meth:`step`.
+            info (optional dictionary): This will *only* be returned if ``return_info=True`` is passed. It contains auxiliary information complementing ``observation``. This dictionary should be analogous to the ``info`` returned by :meth:`step`.
        """
        # Initialize the RNG if the seed is manually passed
        if seed is not None:
@@ -131,7 +144,7 @@ class Env(Generic[ObsType, ActType]):
        Args:
            mode (str): the mode to render with

-        Example:
+        Example::

            class MyEnv(Env):
                metadata = {'render_modes': ['human', 'rgb_array']}
--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -29,10 +29,12 @@ class Box(Space[np.ndarray]):
    There are two common use cases:

    * Identical bound for each dimension::
+        
        >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        Box(3, 4)

    * Independent bound for each dimension::
+        
        >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
        Box(2,)

--- a/gym/spaces/dict.py
+++ b/gym/spaces/dict.py
@@ -13,10 +13,12 @@ class Dict(Space[TypingDict[str, Space]], Mapping):
    """
    A dictionary of simpler spaces.

-    Example usage:
+    Example usage::
+
        self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})

-    Example usage [nested]:
+    Example usage [nested]::
+    
        self.nested_observation_space = spaces.Dict({
            'sensors':  spaces.Dict({
                'position': spaces.Box(low=-100, high=100, shape=(3,)),
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -6,10 +6,10 @@ from .space import Space


 class Discrete(Space[int]):
-    r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
+    r"""A discrete space in :math:`\{ 0, 1, \dots, n-1 \}`.

    A start value can be optionally specified to shift the range
-    to :math:`\{ a, a+1, \\dots, a+n-1 \}`.
+    to :math:`\{ a, a+1, \dots, a+n-1 \}`.

    Example::

--- a/gym/spaces/multi_binary.py
+++ b/gym/spaces/multi_binary.py
@@ -11,20 +11,15 @@ class MultiBinary(Space[np.ndarray]):
    """
    An n-shape binary space.

-    The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
+    The argument to MultiBinary defines n, which could be a number or a ``list`` of numbers.

-    Example Usage:
-
-    >> self.observation_space = spaces.MultiBinary(5)
-
-    >> self.observation_space.sample()
+    Example Usage::

+        >>> self.observation_space = spaces.MultiBinary(5)
+        >>> self.observation_space.sample()
            array([0, 1, 0, 1, 0], dtype=int8)
-
-    >> self.observation_space = spaces.MultiBinary([3, 2])
-
-    >> self.observation_space.sample()
-
+        >>> self.observation_space = spaces.MultiBinary([3, 2])
+        >>> self.observation_space.sample()
            array([[0, 0],
                [0, 1],
                [1, 1]], dtype=int8)
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -12,22 +12,19 @@ from .space import Space

 class MultiDiscrete(Space[np.ndarray]):
    """
-    - The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each
-    - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
-    - It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space
+    The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each. It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space. It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space.
    
-    Note: Some environment wrappers assume a value of 0 always represents the NOOP action.
+    Note: 
    
-    e.g. Nintendo Game Controller
-    - Can be conceptualized as 3 discrete action spaces:
+        Some environment wrappers assume a value of 0 always represents the NOOP action.

-        1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
-        2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
-        3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
+    e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces:

-    - Can be initialized as
+    1. Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
+    2. Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
+    3. Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1

-        MultiDiscrete([ 5, 2, 2 ])
+    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])``

    """

--- a/gym/spaces/tuple.py
+++ b/gym/spaces/tuple.py
@@ -10,7 +10,8 @@ class Tuple(Space[tuple], Sequence):
    """
    A tuple (i.e., product) of simpler spaces

-    Example usage:
+    Example usage::
+
        self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
    """

--- a/gym/spaces/utils.py
+++ b/gym/spaces/utils.py
@@ -17,6 +17,12 @@ def flatdim(space: Space) -> int:

    Accepts a space and returns an integer. Raises ``NotImplementedError`` if
    the space is not defined in ``gym.spaces``.
+
+    Example usage::
+   
+        >>> s = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
+        >>> spaces.flatdim(s)
+        5
    """
    raise NotImplementedError(f"Unknown space: `{space}`")

@@ -101,8 +107,8 @@ def _flatten_dict(space, x) -> np.ndarray:
 def unflatten(space: Space[T], x: np.ndarray) -> T:
    """Unflatten a data point from a space.

-    This reverses the transformation applied by ``flatten()``. You must ensure
-    that the ``space`` argument is the same as for the ``flatten()`` call.
+    This reverses the transformation applied by :func:`flatten`. You must ensure
+    that the ``space`` argument is the same as for the :func:`flatten` call.

    Accepts a space and a flattened point. Returns a point with a structure
    that matches the space. Raises ``NotImplementedError`` if the space is not
@@ -156,9 +162,9 @@ def _unflatten_dict(space: Dict, x: np.ndarray) -> dict:
 def flatten_space(space: Space) -> Box:
    """Flatten a space into a single ``Box``.

-    This is equivalent to ``flatten()``, but operates on the space itself. The
+    This is equivalent to :func:`flatten`, but operates on the space itself. The
    result always is a `Box` with flat boundaries. The box has exactly
-    ``flatdim(space)`` dimensions. Flattening a sample of the original space
+    :func:`flatdim` dimensions. Flattening a sample of the original space
    has the same effect as taking a sample of the flattenend space.

    Raises ``NotImplementedError`` if the space is not defined in
--- a/gym/vector/vector_env.py
+++ b/gym/vector/vector_env.py
@@ -73,7 +73,7 @@ class VectorEnv(gym.Env):

        Returns
        -------
-        element of :attr:`observation_space`
+        observations : element of :attr:`observation_space`
            A batch of observations from the vectorized environment.
        """
        self.reset_async(seed=seed, return_info=return_info, options=options)