Rename gymnasium as gym in docs (#24)

This commit is contained in:
Mark Towers
2022-09-19 21:14:09 +01:00
committed by GitHub
parent 5674a52d4c
commit c9056e37e1
68 changed files with 1134 additions and 1131 deletions

View File

@@ -10,8 +10,8 @@ firstpage:
Initializing environments is very easy in Gymnasium and can be done via:
```python
import gymnasium
env = gymnasium.make('CartPole-v0')
import gymnasium as gym
env = gym.make('CartPole-v0')
```
## Interacting with the Environment
@@ -46,14 +46,15 @@ Let's see what the agent-environment loop looks like in Gymnasium.
This example will run an instance of `LunarLander-v2` environment for 1000 timesteps. Since we pass `render_mode="human"`, you should see a window pop up rendering the environment.
```python
import gymnasium
env = gymnasium.make("LunarLander-v2", render_mode="human")
import gymnasium as gym
env = gym.make("LunarLander-v2", render_mode="human")
env.action_space.seed(42)
observation, info = env.reset(seed=42)
for _ in range(1000):
observation, reward, terminated, truncated, info = env.step(env.action_space.sample())
action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated:
observation, info = env.reset()
@@ -201,7 +202,7 @@ For example, if pressing the keys `w` and `space` at the same time is supposed t
```
As a more complete example, let's say we wish to play with `CartPole-v0` using our left and right arrow keys. The code would be as follows:
```python
import gymnasium
import gymnasium as gym
import pygame
from gymnasium.utils.play import play
mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}

View File

@@ -69,13 +69,13 @@ may look like ` {"agent": array([1, 0]), "target": array([0, 3])}`.
Since we have 4 actions in our environment ("right", "up", "left", "down"), we will use `Discrete(4)` as an action space.
Here is the declaration of `GridWorldEnv` and the implementation of `__init__`:
```python
import gymnasium
import gymnasium as gym
from gymnasium import spaces
import pygame
import numpy as np
class GridWorldEnv(gymnasium.Env):
class GridWorldEnv(gym.Env):
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
def __init__(self, render_mode=None, size=5):
@@ -354,14 +354,14 @@ After you have installed your package locally with `pip install -e gym-examples`
```python
import gym_examples
env = gymnasium.make('gym_examples/GridWorld-v0')
env = gym.make('gym_examples/GridWorld-v0')
```
You can also pass keyword arguments of your environment's constructor to `gymnasium.make` to customize the environment.
In our case, we could do:
```python
env = gymnasium.make('gym_examples/GridWorld-v0', size=10)
env = gym.make('gym_examples/GridWorld-v0', size=10)
```
Sometimes, you may find it more convenient to skip registration and call the environment's
@@ -382,7 +382,7 @@ a wrapper on top of environment instances to flatten observations into a single
import gym_examples
from gymnasium.wrappers import FlattenObservation
env = gymnasium.make('gym_examples/GridWorld-v0')
env = gym.make('gym_examples/GridWorld-v0')
wrapped_env = FlattenObservation(env)
print(wrapped_env.reset()) # E.g. [3 0 3 3], {}
```
@@ -396,7 +396,7 @@ a wrapper that does this job. This wrapper is also available in gym-examples:
import gym_examples
from gym_examples.wrappers import RelativePosition
env = gymnasium.make('gym_examples/GridWorld-v0')
env = gym.make('gym_examples/GridWorld-v0')
wrapped_env = RelativePosition(env)
print(wrapped_env.reset()) # E.g. [-3 3], {}
```

View File

@@ -19,10 +19,11 @@ Similar to `gymnasium.make`, you can run a vectorized version of a registered en
The following example runs 3 copies of the ``CartPole-v1`` environment in parallel, taking as input a vector of 3 binary actions (one for each copy of the environment), and returning an array of 3 observations stacked along the first dimension, with an array of rewards returned by each copy, and an array of booleans indicating if the episode in each parallel environment has ended.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> import gymnasium as gym
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
@@ -31,7 +32,7 @@ array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
dtype=float32)
>>> rewards
array([1., 1., 1.])
>>> dones
>>> terminated
array([False, False, False])
>>> infos
{}
@@ -48,25 +49,25 @@ The function `gymnasium.vector.make` is meant to be used only in basic cases (e.
To create a vectorized environment that runs multiple environment copies, you can wrap your parallel environments inside `gymnasium.vector.SyncVectorEnv` (for sequential execution), or `gymnasium.vector.AsyncVectorEnv` (for parallel execution, with [multiprocessing](https://docs.python.org/3/library/multiprocessing.html)). These vectorized environments take as input a list of callables specifying how the copies are created.
```python
>>> envs = gymnasium.vector.AsyncVectorEnv([
... lambda: gymnasium.make("CartPole-v1"),
... lambda: gymnasium.make("CartPole-v1"),
... lambda: gymnasium.make("CartPole-v1")
>>> envs = gym.vector.AsyncVectorEnv([
... lambda: gym.make("CartPole-v1"),
... lambda: gym.make("CartPole-v1"),
... lambda: gym.make("CartPole-v1")
... ])
```
Alternatively, to create a vectorized environment of multiple copies of the same registered environment, you can use the function `gymnasium.vector.make()`.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3) # Equivalent
>>> envs = gym.vector.make("CartPole-v1", num_envs=3) # Equivalent
```
To enable automatic batching of actions and observations, all of the environment copies must share the same `action_space` and `observation_space`. However, all of the parallel environments are not required to be exact copies of one another. For example, you can run 2 instances of ``Pendulum-v0`` with different values for gravity in a vectorized environment with:
```python
>>> env = gymnasium.vector.AsyncVectorEnv([
... lambda: gymnasium.make("Pendulum-v0", g=9.81),
... lambda: gymnasium.make("Pendulum-v0", g=1.62)
>>> env = gym.vector.AsyncVectorEnv([
... lambda: gym.make("Pendulum-v0", g=9.81),
... lambda: gym.make("Pendulum-v0", g=1.62)
... ])
```
@@ -76,14 +77,14 @@ When using `AsyncVectorEnv` with either the ``spawn`` or ``forkserver`` start me
```python
if __name__ == "__main__":
envs = gymnasium.vector.make("CartPole-v1", num_envs=3, context="spawn")
envs = gym.vector.make("CartPole-v1", num_envs=3, context="spawn")
```
### Working with vectorized environments
While standard Gymnasium environments take a single action and return a single observation (with a reward, and boolean indicating termination), vectorized environments take a *batch of actions* as input, and return a *batch of observations*, together with an array of rewards and booleans indicating if the episode ended in each environment copy.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
(array([[-0.02792548, -0.04423395, 0.00026012, 0.04486719],
[-0.04906582, 0.02779809, 0.02881928, -0.04467649],
@@ -91,7 +92,7 @@ While standard Gymnasium environments take a single action and return a single o
dtype=float32), {})
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
array([[ 0.00187507, 0.18986781, -0.03168437, -0.301252 ],
@@ -100,7 +101,7 @@ array([[ 0.00187507, 0.18986781, -0.03168437, -0.301252 ],
dtype=float32)
>>> rewards
array([1., 1., 1.])
>>> dones
>>> terminated
array([False, False, False])
>>> infos
{}
@@ -109,15 +110,15 @@ array([False, False, False])
Vectorized environments are compatible with any environment, regardless of the action and observation spaces (e.g. container spaces like `gymnasium.spaces.Dict`, or any arbitrarily nested spaces). In particular, vectorized environments can automatically batch the observations returned by `VectorEnv.reset` and `VectorEnv.step` for any standard Gymnasium `Space` (e.g. `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, `gymnasium.spaces.Dict`, or any nested structure thereof). Similarly, vectorized environments can take batches of actions from any standard Gymnasium `Space`.
```python
>>> class DictEnv(gymnasium.Env):
... observation_space = gymnasium.spaces.Dict({
... "position": gymnasium.spaces.Box(-1., 1., (3,), np.float32),
... "velocity": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
>>> class DictEnv(gym.Env):
... observation_space = gym.spaces.Dict({
... "position": gym.spaces.Box(-1., 1., (3,), np.float32),
... "velocity": gym.spaces.Box(-1., 1., (2,), np.float32)
... })
... action_space = gymnasium.spaces.Dict({
... "fire": gymnasium.spaces.Discrete(2),
... "jump": gymnasium.spaces.Discrete(2),
... "acceleration": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
... action_space = gym.spaces.Dict({
... "fire": gym.spaces.Discrete(2),
... "jump": gym.spaces.Discrete(2),
... "acceleration": gym.spaces.Box(-1., 1., (2,), np.float32)
... })
...
... def reset(self):
@@ -125,9 +126,9 @@ Vectorized environments are compatible with any environment, regardless of the a
...
... def step(self, action):
... observation = self.observation_space.sample()
... return (observation, 0., False, {})
... return observation, 0., False, False, {}
>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
>>> envs = gym.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
>>> envs.observation_space
Dict(position:Box(-1.0, 1.0, (3, 3), float32), velocity:Box(-1.0, 1.0, (3, 2), float32))
>>> envs.action_space
@@ -139,7 +140,7 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
... "jump": np.array([0, 1, 0]),
... "acceleration": np.random.uniform(-1., 1., size=(3, 2))
... }
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
{"position": array([[-0.5337036 , 0.7439302 , 0.41748118],
[ 0.9373266 , -0.5780453 , 0.8987405 ],
@@ -152,13 +153,13 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
The environment copies inside a vectorized environment automatically call `gymnasium.Env.reset` at the end of an episode. In the following example, the episode of the 3rd copy ends after 2 steps (the agent fell in a hole), and the paralle environment gets reset (observation ``0``).
```python
>>> envs = gymnasium.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
>>> envs = gym.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
>>> envs.reset()
(array([0, 0, 0]), {'prob': array([1, 1, 1]), '_prob': array([ True, True, True])})
>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 2]))
>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 1]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 2]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 1]))
>>> dones
>>> terminated
array([False, False, True])
>>> observations
array([8, 2, 0])
@@ -170,22 +171,23 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> observations, infos = envs.reset()
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> dones = np.logical_or(terminated, truncated)
>>> while not any(dones):
... observations, rewards, dones, infos = envs.step(actions)
... observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> print(dones)
[False, True, False]
>>> print(infos)
{'terminal_observation': array([None,
{'final_observation': array([None,
array([-0.11350546, -1.8090094 , 0.23710881, 2.8017728 ], dtype=float32),
None], dtype=object), '_terminal_observation': array([False, True, False])}
None], dtype=object), '_final_observation': array([False, True, False])}
```
@@ -193,7 +195,7 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt
Like any Gymnasium environment, vectorized environments contain the two properties `VectorEnv.observation_space` and `VectorEnv.action_space` to specify the observation and action spaces of the environments. Since vectorized environments operate on multiple environment copies, where the actions taken and observations returned by all of the copies are batched together, the observation and action *spaces* are batched as well so that the input actions are valid elements of `VectorEnv.action_space`, and the observations are valid elements of `VectorEnv.observation_space`.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.observation_space
Box([[-4.8 ...]], [[4.8 ...]], (3, 4), float32)
>>> envs.action_space
@@ -203,9 +205,9 @@ MultiDiscrete([2 2 2])
In order to appropriately batch the observations and actions in vectorized environments, the observation and action spaces of all of the copies are required to be identical.
```python
>>> envs = gymnasium.vector.AsyncVectorEnv([
... lambda: gymnasium.make("CartPole-v1"),
... lambda: gymnasium.make("MountainCar-v0")
>>> envs = gym.vector.AsyncVectorEnv([
... lambda: gym.make("CartPole-v1"),
... lambda: gym.make("MountainCar-v0")
... ])
RuntimeError: Some environments have an observation space different from `Box([-4.8 ...], [4.8 ...], (4,), float32)`.
In order to batch observations, the observation spaces from all environments must be equal.
@@ -213,7 +215,7 @@ In order to batch observations, the observation spaces from all environments mus
However, sometimes it may be handy to have access to the observation and action spaces of a particular copy, and not the batched spaces. You can access those with the properties `VectorEnv.single_observation_space` and `VectorEnv.single_action_space` of the vectorized environment.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.single_observation_space
Box([-4.8 ...], [4.8 ...], (4,), float32)
>>> envs.single_action_space
@@ -229,14 +231,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
... logits = np.dot(observations, weights)
... return softmax(logits, axis=1)
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> weights = np.random.randn(
... flatdim(envs.single_observation_space),
... envs.single_action_space.n
... )
>>> observations, infos = envs.reset()
>>> actions = policy(weights, observations).argmax(axis=1)
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
```
## Intermediate Usage
@@ -245,14 +247,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
`AsyncVectorEnv` runs each environment copy inside an individual process. At each call to `AsyncVectorEnv.reset` or `AsyncVectorEnv.step`, the observations of all of the parallel environments are sent back to the main process. To avoid expensive transfers of data between processes, especially with large observations (e.g. images), `AsyncVectorEnv` uses a shared memory by default (``shared_memory=True``) that processes can write to and read from at minimal cost. This can increase the throughout of the vectorized environment.
```python
>>> env_fns = [lambda: gymnasium.make("BreakoutNoFrameskip-v4")] * 5
>>> env_fns = [lambda: gym.make("BreakoutNoFrameskip-v4")] * 5
>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=False)
>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=False)
>>> envs.reset()
>>> %timeit envs.step(envs.action_space.sample())
2.23 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=True)
>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=True)
>>> envs.reset()
>>> %timeit envs.step(envs.action_space.sample())
1.36 ms ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
@@ -262,9 +264,9 @@ This is convenient, for example, if you instantiate a policy. In the following e
Because sometimes things may not go as planned, the exceptions raised in any given environment copy are re-raised in the vectorized environment, even when the copy run in parallel with `AsyncVectorEnv`. This way, you can choose how to handle these exceptions yourself (with ``try ... except``).
```python
>>> class ErrorEnv(gymnasium.Env):
... observation_space = gymnasium.spaces.Box(-1., 1., (2,), np.float32)
... action_space = gymnasium.spaces.Discrete(2)
>>> class ErrorEnv(gym.Env):
... observation_space = gym.spaces.Box(-1., 1., (2,), np.float32)
... action_space = gym.spaces.Discrete(2)
...
... def reset(self):
... return np.zeros((2,), dtype=np.float32), {}
@@ -273,11 +275,11 @@ Because sometimes things may not go as planned, the exceptions raised in any giv
... if action == 1:
... raise ValueError("An error occurred.")
... observation = self.observation_space.sample()
... return (observation, 0., False, {})
... return observation, 0., False, False, {}
>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: ErrorEnv()] * 3)
>>> observations, infos = envs.reset()
>>> observations, rewards, dones, infos = envs.step(np.array([0, 0, 1]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([0, 0, 1]))
ERROR: Received the following error from Worker-2: ValueError: An error occurred.
ERROR: Shutting down Worker-2.
ERROR: Raising the last exception back to the main process.
@@ -292,7 +294,7 @@ Vectorized environments will batch actions and observations if they are elements
In the following example, we create a new environment `SMILESEnv`, whose observations are strings representing the [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) notation of a molecular structure, with a custom observation space `SMILES`. The observations returned by the vectorized environment are contained in a tuple of strings.
```python
>>> class SMILES(gymnasium.Space):
>>> class SMILES(gym.Space):
... def __init__(self, symbols):
... super().__init__()
... self.symbols = symbols
@@ -300,9 +302,9 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
... def __eq__(self, other):
... return self.symbols == other.symbols
>>> class SMILESEnv(gymnasium.Env):
>>> class SMILESEnv(gym.Env):
... observation_space = SMILES("][()CO=")
... action_space = gymnasium.spaces.Discrete(7)
... action_space = gym.spaces.Discrete(7)
...
... def reset(self):
... self._state = "["
@@ -310,15 +312,15 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
...
... def step(self, action):
... self._state += self.observation_space.symbols[action]
... reward = done = (action == 0)
... return (self._state, float(reward), done, {})
... reward = terminated = (action == 0)
... return self._state, float(reward), terminated, False, {}
>>> envs = gymnasium.vector.AsyncVectorEnv(
>>> envs = gym.vector.AsyncVectorEnv(
... [lambda: SMILESEnv()] * 3,
... shared_memory=False
... )
>>> envs.reset()
>>> observations, rewards, dones, infos = envs.step(np.array([2, 5, 4]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([2, 5, 4]))
>>> observations
('[(', '[O', '[C')
```