fix and complete docstrings to match the documentation page (#2697)

2025-08-16 19:49:13 +00:00 · 2022-04-06 20:12:55 +01:00
parent 07fd227a4f
commit 658d4081c0
9 changed files with 94 additions and 78 deletions
--- a/gym/core.py
+++ b/gym/core.py
@@ -63,7 +63,7 @@ class Env(Generic[ObsType, ActType]):
    @abstractmethod
    def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
        """Run one timestep of the environment's dynamics. When end of
-        episode is reached, you are responsible for calling `reset()`
+        episode is reached, you are responsible for calling :meth:`reset`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
@@ -71,11 +71,18 @@ class Env(Generic[ObsType, ActType]):
        Args:
            action (object): an action provided by the agent
        This method returns a tuple ``(observation, reward, done, info)``
        Returns:
-            observation (object): agent's observation of the current environment
+            observation (object): agent's observation of the current environment. This will be an element of the environment's :attr:`observation_space`. This may, for instance, be a numpy array containing the positions and velocities of certain objects.
            reward (float) : amount of reward returned after previous action
-            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
+            done (bool): whether the episode has ended, in which case further :meth:`step` calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state. ``info`` may contain additional information regarding the reason for a ``done`` signal.
-            info (dict): contains auxiliary diagnostic information (helpful for debugging, logging, and sometimes learning)
+            info (dict): contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain:
                - metrics that describe the agent's performance or
                - state variables that are hidden from observations or
                - information that distinguishes truncation and termination or
                - individual reward terms that are combined to produce the total reward
        """
        raise NotImplementedError
@@ -91,16 +98,22 @@ class Env(Generic[ObsType, ActType]):
        observation.
        This method should also reset the environment's random number
-        generator(s) if `seed` is an integer or if the environment has not
+        generator(s) if ``seed`` is an integer or if the environment has not
        yet initialized a random number generator. If the environment already
-        has a random number generator and `reset` is called with `seed=None`,
+        has a random number generator and :meth:`reset` is called with ``seed=None``,
        the RNG should not be reset.
-        Moreover, `reset` should (in the typical use case) be called with an
+        Moreover, :meth:`reset` should (in the typical use case) be called with an
        integer seed right after initialization and then never again.
        Args:
            seed (int or None): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is pased, the PRNG will *not* be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action.
            return_info (bool): If true, return additional information along with initial observation. This info should be analogous to the info returned in :meth:`step`
            options (dict or None): Additional information to specify how the environment is reset (optional, depending on the specific environment)
        Returns:
-            observation (object): the initial observation.
+            observation (object): Observation of the initial state. This will be an element of :attr:`observation_space` (usually a numpy array) and is analogous to the observation returned by :meth:`step`.
-            info (optional dictionary): a dictionary containing extra information, this is only returned if return_info is set to true
+            info (optional dictionary): This will *only* be returned if ``return_info=True`` is passed. It contains auxiliary information complementing ``observation``. This dictionary should be analogous to the ``info`` returned by :meth:`step`.
        """
        # Initialize the RNG if the seed is manually passed
        if seed is not None:
@@ -122,7 +135,7 @@ class Env(Generic[ObsType, ActType]):
        - ansi: Return a string (str) or StringIO.StringIO containing a
          terminal-style text representation. The text can include newlines
          and ANSI escape sequences (e.g. for colors).
-
+                
        Note:
            Make sure that your class's metadata 'render_modes' key includes
              the list of supported modes. It's recommended to call super()
@@ -131,18 +144,18 @@ class Env(Generic[ObsType, ActType]):
        Args:
            mode (str): the mode to render with
-        Example:
+        Example::
-        class MyEnv(Env):
+            class MyEnv(Env):
-            metadata = {'render_modes': ['human', 'rgb_array']}
+                metadata = {'render_modes': ['human', 'rgb_array']}
-            def render(self, mode='human'):
+                def render(self, mode='human'):
-                if mode == 'rgb_array':
+                    if mode == 'rgb_array':
-                    return np.array(...) # return RGB frame suitable for video
+                        return np.array(...) # return RGB frame suitable for video
-                elif mode == 'human':
+                    elif mode == 'human':
-                    ... # pop up a window and render
+                        ... # pop up a window and render
-                else:
+                    else:
-                    super(MyEnv, self).render(mode=mode) # just raise an exception
+                        super(MyEnv, self).render(mode=mode) # just raise an exception
        """
        raise NotImplementedError
--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -29,10 +29,12 @@ class Box(Space[np.ndarray]):
    There are two common use cases:
    * Identical bound for each dimension::
        >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32)
        Box(3, 4)
    * Independent bound for each dimension::
        >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32)
        Box(2,)
--- a/gym/spaces/dict.py
+++ b/gym/spaces/dict.py
@@ -13,30 +13,32 @@ class Dict(Space[TypingDict[str, Space]], Mapping):
    """
    A dictionary of simpler spaces.
-    Example usage:
+    Example usage::
    self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
-    Example usage [nested]:
+        self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
-    self.nested_observation_space = spaces.Dict({
+
-        'sensors':  spaces.Dict({
+    Example usage [nested]::
-            'position': spaces.Box(low=-100, high=100, shape=(3,)),
+    
-            'velocity': spaces.Box(low=-1, high=1, shape=(3,)),
+        self.nested_observation_space = spaces.Dict({
-            'front_cam': spaces.Tuple((
+            'sensors':  spaces.Dict({
-                spaces.Box(low=0, high=1, shape=(10, 10, 3)),
+                'position': spaces.Box(low=-100, high=100, shape=(3,)),
-                spaces.Box(low=0, high=1, shape=(10, 10, 3))
+                'velocity': spaces.Box(low=-1, high=1, shape=(3,)),
-            )),
+                'front_cam': spaces.Tuple((
-            'rear_cam': spaces.Box(low=0, high=1, shape=(10, 10, 3)),
+                    spaces.Box(low=0, high=1, shape=(10, 10, 3)),
-        }),
+                    spaces.Box(low=0, high=1, shape=(10, 10, 3))
-        'ext_controller': spaces.MultiDiscrete((5, 2, 2)),
+                )),
-        'inner_state':spaces.Dict({
+                'rear_cam': spaces.Box(low=0, high=1, shape=(10, 10, 3)),
-            'charge': spaces.Discrete(100),
+            }),
-            'system_checks': spaces.MultiBinary(10),
+            'ext_controller': spaces.MultiDiscrete((5, 2, 2)),
-            'job_status': spaces.Dict({
+            'inner_state':spaces.Dict({
-                'task': spaces.Discrete(5),
+                'charge': spaces.Discrete(100),
-                'progress': spaces.Box(low=0, high=100, shape=()),
+                'system_checks': spaces.MultiBinary(10),
                'job_status': spaces.Dict({
                    'task': spaces.Discrete(5),
                    'progress': spaces.Box(low=0, high=100, shape=()),
                })
            })
        })
    })
    """
    def __init__(
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -6,10 +6,10 @@ from .space import Space
 class Discrete(Space[int]):
-    r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
+    r"""A discrete space in :math:`\{ 0, 1, \dots, n-1 \}`.
    A start value can be optionally specified to shift the range
-    to :math:`\{ a, a+1, \\dots, a+n-1 \}`.
+    to :math:`\{ a, a+1, \dots, a+n-1 \}`.
    Example::
--- a/gym/spaces/multi_binary.py
+++ b/gym/spaces/multi_binary.py
@@ -11,23 +11,18 @@ class MultiBinary(Space[np.ndarray]):
    """
    An n-shape binary space.
-    The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
+    The argument to MultiBinary defines n, which could be a number or a ``list`` of numbers.
-    Example Usage:
+    Example Usage::
-    >> self.observation_space = spaces.MultiBinary(5)
+        >>> self.observation_space = spaces.MultiBinary(5)
-
+        >>> self.observation_space.sample()
-    >> self.observation_space.sample()
+            array([0, 1, 0, 1, 0], dtype=int8)
-
+        >>> self.observation_space = spaces.MultiBinary([3, 2])
-        array([0, 1, 0, 1, 0], dtype=int8)
+        >>> self.observation_space.sample()
-
+            array([[0, 0],
-    >> self.observation_space = spaces.MultiBinary([3, 2])
+                [0, 1],
-
+                [1, 1]], dtype=int8)
    >> self.observation_space.sample()
        array([[0, 0],
               [0, 1],
               [1, 1]], dtype=int8)
    """
--- a/gym/spaces/multi_discrete.py
+++ b/gym/spaces/multi_discrete.py
@@ -12,22 +12,19 @@ from .space import Space
 class MultiDiscrete(Space[np.ndarray]):
    """
-    - The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each
+    The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each. It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space. It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space.
-    - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
+    
-    - It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space
+    Note: 
        Some environment wrappers assume a value of 0 always represents the NOOP action.
-    Note: Some environment wrappers assume a value of 0 always represents the NOOP action.
+    e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces:
-    e.g. Nintendo Game Controller
+    1. Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
-    - Can be conceptualized as 3 discrete action spaces:
+    2. Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
    3. Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
-        1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
+    It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])``
        2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
        3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
    - Can be initialized as
        MultiDiscrete([ 5, 2, 2 ])
    """
--- a/gym/spaces/tuple.py
+++ b/gym/spaces/tuple.py
@@ -10,8 +10,9 @@ class Tuple(Space[tuple], Sequence):
    """
    A tuple (i.e., product) of simpler spaces
-    Example usage:
+    Example usage::
-    self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
+
        self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
    """
    def __init__(
--- a/gym/spaces/utils.py
+++ b/gym/spaces/utils.py
@@ -17,6 +17,12 @@ def flatdim(space: Space) -> int:
    Accepts a space and returns an integer. Raises ``NotImplementedError`` if
    the space is not defined in ``gym.spaces``.
    Example usage::
        >>> s = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)})
        >>> spaces.flatdim(s)
        5
    """
    raise NotImplementedError(f"Unknown space: `{space}`")
@@ -101,8 +107,8 @@ def _flatten_dict(space, x) -> np.ndarray:
 def unflatten(space: Space[T], x: np.ndarray) -> T:
    """Unflatten a data point from a space.
-    This reverses the transformation applied by ``flatten()``. You must ensure
+    This reverses the transformation applied by :func:`flatten`. You must ensure
-    that the ``space`` argument is the same as for the ``flatten()`` call.
+    that the ``space`` argument is the same as for the :func:`flatten` call.
    Accepts a space and a flattened point. Returns a point with a structure
    that matches the space. Raises ``NotImplementedError`` if the space is not
@@ -156,9 +162,9 @@ def _unflatten_dict(space: Dict, x: np.ndarray) -> dict:
 def flatten_space(space: Space) -> Box:
    """Flatten a space into a single ``Box``.
-    This is equivalent to ``flatten()``, but operates on the space itself. The
+    This is equivalent to :func:`flatten`, but operates on the space itself. The
    result always is a `Box` with flat boundaries. The box has exactly
-    ``flatdim(space)`` dimensions. Flattening a sample of the original space
+    :func:`flatdim` dimensions. Flattening a sample of the original space
    has the same effect as taking a sample of the flattenend space.
    Raises ``NotImplementedError`` if the space is not defined in
--- a/gym/vector/vector_env.py
+++ b/gym/vector/vector_env.py
@@ -73,7 +73,7 @@ class VectorEnv(gym.Env):
        Returns
        -------
-        element of :attr:`observation_space`
+        observations : element of :attr:`observation_space`
            A batch of observations from the vectorized environment.
        """
        self.reset_async(seed=seed, return_info=return_info, options=options)