Rename gymnasium as gym in docs (#24)

2025-08-19 13:32:03 +00:00 · 2022-09-19 21:14:09 +01:00
parent 5674a52d4c
commit c9056e37e1
68 changed files with 1134 additions and 1131 deletions
--- a/docs/content/basic_usage.md
+++ b/docs/content/basic_usage.md
@@ -10,8 +10,8 @@ firstpage:
 Initializing environments is very easy in Gymnasium and can be done via: 

 ```python
-import gymnasium
-env = gymnasium.make('CartPole-v0')
+import gymnasium as gym
+env = gym.make('CartPole-v0')
 ```

 ## Interacting with the Environment
@@ -46,14 +46,15 @@ Let's see what the agent-environment loop looks like in Gymnasium.
 This example will run an instance of `LunarLander-v2` environment for 1000 timesteps. Since we pass `render_mode="human"`, you should see a window pop up rendering the environment.

 ```python
-import gymnasium
-env = gymnasium.make("LunarLander-v2", render_mode="human")
+import gymnasium as gym
+env = gym.make("LunarLander-v2", render_mode="human")
 env.action_space.seed(42)

 observation, info = env.reset(seed=42)

 for _ in range(1000):
-    observation, reward, terminated, truncated, info = env.step(env.action_space.sample())
+    action = env.action_space.sample()
+    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()
@@ -201,7 +202,7 @@ For example, if pressing the keys `w` and `space` at the same time is supposed t
 ```
 As a more complete example, let's say we wish to play with `CartPole-v0` using our left and right arrow keys. The code would be as follows:
 ```python
-import gymnasium
+import gymnasium as gym
 import pygame
 from gymnasium.utils.play import play
 mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}
--- a/docs/content/environment_creation.md
+++ b/docs/content/environment_creation.md
@@ -69,13 +69,13 @@ may look like ` {"agent": array([1, 0]), "target": array([0, 3])}`.
 Since we have 4 actions in our environment ("right", "up", "left", "down"), we will use `Discrete(4)` as an action space.
 Here is the declaration of `GridWorldEnv` and the implementation of `__init__`:
 ```python
-import gymnasium
+import gymnasium as gym
 from gymnasium import spaces
 import pygame
 import numpy as np


-class GridWorldEnv(gymnasium.Env):
+class GridWorldEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

    def __init__(self, render_mode=None, size=5):
@@ -354,14 +354,14 @@ After you have installed your package locally with `pip install -e gym-examples`

 ```python
 import gym_examples
-env = gymnasium.make('gym_examples/GridWorld-v0')
+env = gym.make('gym_examples/GridWorld-v0')
 ```

 You can also pass keyword arguments of your environment's constructor to `gymnasium.make` to customize the environment.
 In our case, we could do:

 ```python
-env = gymnasium.make('gym_examples/GridWorld-v0', size=10)
+env = gym.make('gym_examples/GridWorld-v0', size=10)
 ```

 Sometimes, you may find it more convenient to skip registration and call the environment's
@@ -382,7 +382,7 @@ a wrapper on top of environment instances to flatten observations into a single
 import gym_examples
 from gymnasium.wrappers import FlattenObservation

-env = gymnasium.make('gym_examples/GridWorld-v0')
+env = gym.make('gym_examples/GridWorld-v0')
 wrapped_env = FlattenObservation(env)
 print(wrapped_env.reset())     # E.g.  [3 0 3 3], {}
 ```
@@ -396,7 +396,7 @@ a wrapper that does this job. This wrapper is also available in gym-examples:
 import gym_examples
 from gym_examples.wrappers import RelativePosition

-env = gymnasium.make('gym_examples/GridWorld-v0')
+env = gym.make('gym_examples/GridWorld-v0')
 wrapped_env = RelativePosition(env)
 print(wrapped_env.reset())     # E.g.  [-3  3], {}
 ```
--- a/docs/content/vectorising.md
+++ b/docs/content/vectorising.md
@@ -19,10 +19,11 @@ Similar to `gymnasium.make`, you can run a vectorized version of a registered en
 The following example runs 3 copies of the ``CartPole-v1`` environment in parallel, taking as input a vector of 3 binary actions (one for each copy of the environment), and returning an array of 3 observations stacked along the first dimension, with an array of rewards returned by each copy, and an array of booleans indicating if the episode in each parallel environment has ended.

 ```python
->>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> import gymnasium as gym
+>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
 >>> envs.reset()
 >>> actions = np.array([1, 0, 1])
->>> observations, rewards, dones, infos = envs.step(actions)
+>>> observations, rewards, terminated, truncated, infos = envs.step(actions)

 >>> observations
 array([[ 0.00122802,  0.16228443,  0.02521779, -0.23700266],
@@ -31,7 +32,7 @@ array([[ 0.00122802,  0.16228443,  0.02521779, -0.23700266],
        dtype=float32)
 >>> rewards
 array([1., 1., 1.])
->>> dones
+>>> terminated
 array([False, False, False])
 >>> infos
 {}
@@ -48,25 +49,25 @@ The function `gymnasium.vector.make` is meant to be used only in basic cases (e.
 To create a vectorized environment that runs multiple environment copies, you can wrap your parallel environments inside `gymnasium.vector.SyncVectorEnv` (for sequential execution), or `gymnasium.vector.AsyncVectorEnv` (for parallel execution, with [multiprocessing](https://docs.python.org/3/library/multiprocessing.html)). These vectorized environments take as input a list of callables specifying how the copies are created.

 ```python
->>> envs = gymnasium.vector.AsyncVectorEnv([
-...     lambda: gymnasium.make("CartPole-v1"),
-...     lambda: gymnasium.make("CartPole-v1"),
-...     lambda: gymnasium.make("CartPole-v1")
+>>> envs = gym.vector.AsyncVectorEnv([
+...     lambda: gym.make("CartPole-v1"),
+...     lambda: gym.make("CartPole-v1"),
+...     lambda: gym.make("CartPole-v1")
 ... ])
 ```

 Alternatively, to create a vectorized environment of multiple copies of the same registered environment, you can use the function `gymnasium.vector.make()`.

 ```python
->>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)  # Equivalent
+>>> envs = gym.vector.make("CartPole-v1", num_envs=3)  # Equivalent
 ```

 To enable automatic batching of actions and observations, all of the environment copies must share the same `action_space` and `observation_space`. However, all of the parallel environments are not required to be exact copies of one another. For example, you can run 2 instances of ``Pendulum-v0`` with different values for gravity in a vectorized environment with:

 ```python
->>> env = gymnasium.vector.AsyncVectorEnv([
-...     lambda: gymnasium.make("Pendulum-v0", g=9.81),
-...     lambda: gymnasium.make("Pendulum-v0", g=1.62)
+>>> env = gym.vector.AsyncVectorEnv([
+...     lambda: gym.make("Pendulum-v0", g=9.81),
+...     lambda: gym.make("Pendulum-v0", g=1.62)
 ... ])
 ```

@@ -76,14 +77,14 @@ When using `AsyncVectorEnv` with either the ``spawn`` or ``forkserver`` start me

 ```python
 if __name__ == "__main__":
-    envs = gymnasium.vector.make("CartPole-v1", num_envs=3, context="spawn")
+    envs = gym.vector.make("CartPole-v1", num_envs=3, context="spawn")
 ```
 ### Working with vectorized environments
 While standard Gymnasium environments take a single action and return a single observation (with a reward, and boolean indicating termination), vectorized environments take a *batch of actions* as input, and return a *batch of observations*, together with an array of rewards and booleans indicating if the episode ended in each environment copy.


 ```python
->>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
 >>> envs.reset()
 (array([[-0.02792548, -0.04423395,  0.00026012,  0.04486719],
       [-0.04906582,  0.02779809,  0.02881928, -0.04467649],
@@ -91,7 +92,7 @@ While standard Gymnasium environments take a single action and return a single o
      dtype=float32), {})

 >>> actions = np.array([1, 0, 1])
->>> observations, rewards, dones, infos = envs.step(actions)
+>>> observations, rewards, terminated, truncated, infos = envs.step(actions)

 >>> observations
 array([[ 0.00187507,  0.18986781, -0.03168437, -0.301252  ],
@@ -100,7 +101,7 @@ array([[ 0.00187507,  0.18986781, -0.03168437, -0.301252  ],
      dtype=float32)
 >>> rewards
 array([1., 1., 1.])
->>> dones
+>>> terminated
 array([False, False, False])
 >>> infos
 {}
@@ -109,15 +110,15 @@ array([False, False, False])
 Vectorized environments are compatible with any environment, regardless of the action and observation spaces (e.g. container spaces like `gymnasium.spaces.Dict`, or any arbitrarily nested spaces). In particular, vectorized environments can automatically batch the observations returned by `VectorEnv.reset` and `VectorEnv.step` for any standard Gymnasium `Space` (e.g. `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, `gymnasium.spaces.Dict`, or any nested structure thereof). Similarly, vectorized environments can take batches of actions from any standard Gymnasium `Space`.

 ```python
->>> class DictEnv(gymnasium.Env):
-...     observation_space = gymnasium.spaces.Dict({
-...         "position": gymnasium.spaces.Box(-1., 1., (3,), np.float32),
-...         "velocity": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
+>>> class DictEnv(gym.Env):
+...     observation_space = gym.spaces.Dict({
+...         "position": gym.spaces.Box(-1., 1., (3,), np.float32),
+...         "velocity": gym.spaces.Box(-1., 1., (2,), np.float32)
 ...     })
-...     action_space = gymnasium.spaces.Dict({
-...         "fire": gymnasium.spaces.Discrete(2),
-...         "jump": gymnasium.spaces.Discrete(2),
-...         "acceleration": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
+...     action_space = gym.spaces.Dict({
+...         "fire": gym.spaces.Discrete(2),
+...         "jump": gym.spaces.Discrete(2),
+...         "acceleration": gym.spaces.Box(-1., 1., (2,), np.float32)
 ...     })
 ...
 ...     def reset(self):
@@ -125,9 +126,9 @@ Vectorized environments are compatible with any environment, regardless of the a
 ...
 ...     def step(self, action):
 ...         observation = self.observation_space.sample()
-...         return (observation, 0., False, {})
+...         return observation, 0., False, False, {}

->>> envs = gymnasium.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
+>>> envs = gym.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
 >>> envs.observation_space
 Dict(position:Box(-1.0, 1.0, (3, 3), float32), velocity:Box(-1.0, 1.0, (3, 2), float32))
 >>> envs.action_space
@@ -139,7 +140,7 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
 ...     "jump": np.array([0, 1, 0]),
 ...     "acceleration": np.random.uniform(-1., 1., size=(3, 2))
 ... }
->>> observations, rewards, dones, infos = envs.step(actions)
+>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
 >>> observations
 {"position": array([[-0.5337036 ,  0.7439302 ,  0.41748118],
                    [ 0.9373266 , -0.5780453 ,  0.8987405 ],
@@ -152,13 +153,13 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
 The environment copies inside a vectorized environment automatically call `gymnasium.Env.reset` at the end of an episode. In the following example, the episode of the 3rd copy ends after 2 steps (the agent fell in a hole), and the paralle environment gets reset (observation ``0``).

 ```python
->>> envs = gymnasium.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
+>>> envs = gym.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
 >>> envs.reset()
 (array([0, 0, 0]), {'prob': array([1, 1, 1]), '_prob': array([ True,  True,  True])})
->>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 2]))
->>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 1]))
+>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 2]))
+>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 1]))

->>> dones
+>>> terminated
 array([False, False,  True])
 >>> observations
 array([8, 2, 0])
@@ -170,22 +171,23 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt


 ```python
->>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
 >>> observations, infos = envs.reset()

 >>> actions = np.array([1, 0, 1])
->>> observations, rewards, dones, infos = envs.step(actions)
+>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
+>>> dones = np.logical_or(terminated, truncated)

 >>> while not any(dones):
-...    observations, rewards, dones, infos = envs.step(actions)
+...    observations, rewards, terminated, truncated, infos = envs.step(actions)

 >>> print(dones)
 [False, True, False]
    
 >>> print(infos)
-{'terminal_observation': array([None,
+{'final_observation': array([None,
       array([-0.11350546, -1.8090094 ,  0.23710881,  2.8017728 ], dtype=float32),
-       None], dtype=object), '_terminal_observation': array([False,  True, False])}
+       None], dtype=object), '_final_observation': array([False,  True, False])}
 ```


@@ -193,7 +195,7 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt
 Like any Gymnasium environment, vectorized environments contain the two properties `VectorEnv.observation_space` and `VectorEnv.action_space` to specify the observation and action spaces of the environments. Since vectorized environments operate on multiple environment copies, where the actions taken and observations returned by all of the copies are batched together, the observation and action *spaces* are batched as well so that the input actions are valid elements of `VectorEnv.action_space`, and the observations are valid elements of `VectorEnv.observation_space`.

 ```python
->>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
 >>> envs.observation_space
 Box([[-4.8 ...]], [[4.8 ...]], (3, 4), float32)
 >>> envs.action_space
@@ -203,9 +205,9 @@ MultiDiscrete([2 2 2])
 In order to appropriately batch the observations and actions in vectorized environments, the observation and action spaces of all of the copies are required to be identical.

 ```python
->>> envs = gymnasium.vector.AsyncVectorEnv([
-...     lambda: gymnasium.make("CartPole-v1"),
-...     lambda: gymnasium.make("MountainCar-v0")
+>>> envs = gym.vector.AsyncVectorEnv([
+...     lambda: gym.make("CartPole-v1"),
+...     lambda: gym.make("MountainCar-v0")
 ... ])
 RuntimeError: Some environments have an observation space different from `Box([-4.8 ...], [4.8 ...], (4,), float32)`. 
 In order to batch observations, the observation spaces from all environments must be equal.
@@ -213,7 +215,7 @@ In order to batch observations, the observation spaces from all environments mus
 However, sometimes it may be handy to have access to the observation and action spaces of a particular copy, and not the batched spaces. You can access those with the properties `VectorEnv.single_observation_space` and `VectorEnv.single_action_space` of the vectorized environment.

 ```python
->>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
 >>> envs.single_observation_space
 Box([-4.8 ...], [4.8 ...], (4,), float32)
 >>> envs.single_action_space
@@ -229,14 +231,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
 ...     logits = np.dot(observations, weights)
 ...     return softmax(logits, axis=1)

->>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
 >>> weights = np.random.randn(
 ...     flatdim(envs.single_observation_space),
 ...     envs.single_action_space.n
 ... )
 >>> observations, infos = envs.reset()
 >>> actions = policy(weights, observations).argmax(axis=1)
->>> observations, rewards, dones, infos = envs.step(actions)
+>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
 ```

 ## Intermediate Usage
@@ -245,14 +247,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
 `AsyncVectorEnv` runs each environment copy inside an individual process. At each call to `AsyncVectorEnv.reset` or `AsyncVectorEnv.step`, the observations of all of the parallel environments are sent back to the main process. To avoid expensive transfers of data between processes, especially with large observations (e.g. images), `AsyncVectorEnv` uses a shared memory by default (``shared_memory=True``) that processes can write to and read from at minimal cost. This can increase the throughout of the vectorized environment.

 ```python
->>> env_fns = [lambda: gymnasium.make("BreakoutNoFrameskip-v4")] * 5
+>>> env_fns = [lambda: gym.make("BreakoutNoFrameskip-v4")] * 5

->>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=False)
+>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=False)
 >>> envs.reset()
 >>> %timeit envs.step(envs.action_space.sample())
 2.23 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

->>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=True)
+>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=True)
 >>> envs.reset()
 >>> %timeit envs.step(envs.action_space.sample())
 1.36 ms ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
@@ -262,9 +264,9 @@ This is convenient, for example, if you instantiate a policy. In the following e
 Because sometimes things may not go as planned, the exceptions raised in any given environment copy are re-raised in the vectorized environment, even when the copy run in parallel with `AsyncVectorEnv`. This way, you can choose how to handle these exceptions yourself (with ``try ... except``).

 ```python
->>> class ErrorEnv(gymnasium.Env):
-...     observation_space = gymnasium.spaces.Box(-1., 1., (2,), np.float32)
-...     action_space = gymnasium.spaces.Discrete(2)
+>>> class ErrorEnv(gym.Env):
+...     observation_space = gym.spaces.Box(-1., 1., (2,), np.float32)
+...     action_space = gym.spaces.Discrete(2)
 ...
 ...     def reset(self):
 ...         return np.zeros((2,), dtype=np.float32), {}
@@ -273,11 +275,11 @@ Because sometimes things may not go as planned, the exceptions raised in any giv
 ...         if action == 1:
 ...             raise ValueError("An error occurred.")
 ...         observation = self.observation_space.sample()
-...         return (observation, 0., False, {})
+...         return observation, 0., False, False, {}

 >>> envs = gymnasium.vector.AsyncVectorEnv([lambda: ErrorEnv()] * 3)
 >>> observations, infos = envs.reset()
->>> observations, rewards, dones, infos = envs.step(np.array([0, 0, 1]))
+>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([0, 0, 1]))
 ERROR: Received the following error from Worker-2: ValueError: An error occurred.
 ERROR: Shutting down Worker-2.
 ERROR: Raising the last exception back to the main process.
@@ -292,7 +294,7 @@ Vectorized environments will batch actions and observations if they are elements
 In the following example, we create a new environment `SMILESEnv`, whose observations are strings representing the [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) notation of a molecular structure, with a custom observation space `SMILES`. The observations returned by the vectorized environment are contained in a tuple of strings. 

 ```python
->>> class SMILES(gymnasium.Space):
+>>> class SMILES(gym.Space):
 ...     def __init__(self, symbols):
 ...         super().__init__()
 ...         self.symbols = symbols
@@ -300,9 +302,9 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
 ...     def __eq__(self, other):
 ...         return self.symbols == other.symbols

->>> class SMILESEnv(gymnasium.Env):
+>>> class SMILESEnv(gym.Env):
 ...     observation_space = SMILES("][()CO=")
-...     action_space = gymnasium.spaces.Discrete(7)
+...     action_space = gym.spaces.Discrete(7)
 ...
 ...     def reset(self):
 ...         self._state = "["
@@ -310,15 +312,15 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
 ...
 ...     def step(self, action):
 ...         self._state += self.observation_space.symbols[action]
-...         reward = done = (action == 0)
-...         return (self._state, float(reward), done, {})
+...         reward = terminated = (action == 0)
+...         return self._state, float(reward), terminated, False, {}

->>> envs = gymnasium.vector.AsyncVectorEnv(
+>>> envs = gym.vector.AsyncVectorEnv(
 ...     [lambda: SMILESEnv()] * 3,
 ...     shared_memory=False
 ... )
 >>> envs.reset()
->>> observations, rewards, dones, infos = envs.step(np.array([2, 5, 4]))
+>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([2, 5, 4]))
 >>> observations
 ('[(', '[O', '[C')
 ```