Rename gymnasium as gym in docs (#24)

This commit is contained in:
Mark Towers
2022-09-19 21:14:09 +01:00
committed by GitHub
parent 5674a52d4c
commit c9056e37e1
68 changed files with 1134 additions and 1131 deletions

View File

@@ -54,7 +54,8 @@ title: Vector
```
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> import gymnasium as gym
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
(array([[-0.02240574, -0.03439831, -0.03904812, 0.02810693],
[ 0.01586068, 0.01929009, 0.02394426, 0.04016077],
@@ -68,10 +69,10 @@ title: Vector
```
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
@@ -80,7 +81,7 @@ array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
dtype=float32)
>>> rewards
array([1., 1., 1.])
>>> dones
>>> terminated
array([False, False, False])
>>> infos
{}

View File

@@ -12,9 +12,9 @@ also be chained to combine their effects. Most environments that are generated v
In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along
with (possibly optional) parameters to the wrapper's constructor:
```python
>>> import gymnasium
>>> import gymnasium as gym
>>> from gymnasium.wrappers import RescaleAction
>>> base_env = gymnasium.make("BipedalWalker-v3")
>>> base_env = gym.make("BipedalWalker-v3")
>>> base_env.action_space
Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32)
>>> wrapped_env = RescaleAction(base_env, min_action=0, max_action=1)
@@ -64,7 +64,7 @@ Let's say you have an environment with action space of type `Box`, but you would
only like to use a finite subset of actions. Then, you might want to implement the following wrapper
```python
class DiscreteActions(gymnasium.ActionWrapper):
class DiscreteActions(gym.ActionWrapper):
def __init__(self, env, disc_to_cont):
super().__init__(env)
self.disc_to_cont = disc_to_cont
@@ -74,7 +74,7 @@ class DiscreteActions(gymnasium.ActionWrapper):
return self.disc_to_cont[act]
if __name__ == "__main__":
env = gymnasium.make("LunarLanderContinuous-v2")
env = gym.make("LunarLanderContinuous-v2")
wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]),
np.array([0,1]), np.array([0,-1])])
print(wrapped_env.action_space) #Discrete(4)
@@ -95,7 +95,7 @@ the position of the target relative to the agent, i.e. `observation["target_posi
For this, you could implement an observation wrapper like this:
```python
class RelativePosition(gymnasium.ObservationWrapper):
class RelativePosition(gym.ObservationWrapper):
def __init__(self, env):
super().__init__(env)
self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)
@@ -117,7 +117,7 @@ Let us look at an example: Sometimes (especially when we do not have control ove
to a range to gain some numerical stability. To do that, we could, for instance, implement the following wrapper:
```python
class ClipReward(gymnasium.RewardWrapper):
class ClipReward(gym.RewardWrapper):
def __init__(self, env, min_reward, max_reward):
super().__init__(env)
self.min_reward = min_reward
@@ -137,7 +137,7 @@ When calling step causes `self.env.step()` to return `done=True`,
and the return format of `self.step()` is as follows:
```python
new_obs, terminal_reward, terminal_done, info
new_obs, terminal_reward, terminated, truncated, info
```
`new_obs` is the first observation after calling `self.env.reset()`,
@@ -145,7 +145,7 @@ new_obs, terminal_reward, terminal_done, info
`terminal_reward` is the reward after calling `self.env.step()`,
prior to calling `self.env.reset()`
`terminal_done` is always `True`
`terminated or truncated` is always `True`
`info` is a dict containing all the keys from the info dict returned by
the call to `self.env.reset()`, with additional keys `terminal_observation`
@@ -156,7 +156,7 @@ to `self.env.step()`.
If `done` is not true when `self.env.step()` is called, `self.step()` returns
```python
obs, reward, done, info
obs, reward, terminated, truncated, info
```
as normal.
@@ -164,12 +164,12 @@ as normal.
The AutoResetWrapper is not applied by default when calling `gymnasium.make()`, but can be applied by setting the optional `autoreset` argument to `True`:
```python
env = gymnasium.make("CartPole-v1", autoreset=True)
env = gym.make("CartPole-v1", autoreset=True)
```
The AutoResetWrapper can also be applied using its constructor:
```python
env = gymnasium.make("CartPole-v1")
env = gym.make("CartPole-v1")
env = AutoResetWrapper(env)
```
@@ -204,7 +204,7 @@ initialization of the environment. However, *Reacher* does not allow you to do t
of the reward are returned in `info`, so let us build a wrapper for Reacher that allows us to weight those terms:
```python
class ReacherRewardWrapper(gymnasium.Wrapper):
class ReacherRewardWrapper(gym.Wrapper):
def __init__(self, env, reward_dist_weight, reward_ctrl_weight):
super().__init__(env)
self.reward_dist_weight = reward_dist_weight
@@ -226,7 +226,7 @@ It is *not* sufficient to use a `RewardWrapper` in this case!
## Available Wrappers
| Name | Type | Arguments | Description |
|---------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|---------------------------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `AtariPreprocessing` | `gymnasium.Wrapper` | `env: gymnasium.Env`, `noop_max: int = 30`, `frame_skip: int = 4`, `screen_size: int = 84`, `terminal_on_life_loss: bool = False`, `grayscale_obs: bool = True`, `grayscale_newaxis: bool = False`, `scale_obs: bool = False` | Implements the best practices from Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents" but will be deprecated soon. |
| `AutoResetWrapper` | `gymnasium.Wrapper` | `env` | The wrapped environment will automatically reset when the done state is reached. Make sure you read the documentation before using this wrapper! |
| `ClipAction` | `gymnasium.ActionWrapper` | `env` | Clip the continuous action to the valid bound specified by the environment's `action_space` |

View File

@@ -10,8 +10,8 @@ firstpage:
Initializing environments is very easy in Gymnasium and can be done via:
```python
import gymnasium
env = gymnasium.make('CartPole-v0')
import gymnasium as gym
env = gym.make('CartPole-v0')
```
## Interacting with the Environment
@@ -46,14 +46,15 @@ Let's see what the agent-environment loop looks like in Gymnasium.
This example will run an instance of `LunarLander-v2` environment for 1000 timesteps. Since we pass `render_mode="human"`, you should see a window pop up rendering the environment.
```python
import gymnasium
env = gymnasium.make("LunarLander-v2", render_mode="human")
import gymnasium as gym
env = gym.make("LunarLander-v2", render_mode="human")
env.action_space.seed(42)
observation, info = env.reset(seed=42)
for _ in range(1000):
observation, reward, terminated, truncated, info = env.step(env.action_space.sample())
action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated:
observation, info = env.reset()
@@ -201,7 +202,7 @@ For example, if pressing the keys `w` and `space` at the same time is supposed t
```
As a more complete example, let's say we wish to play with `CartPole-v0` using our left and right arrow keys. The code would be as follows:
```python
import gymnasium
import gymnasium as gym
import pygame
from gymnasium.utils.play import play
mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}

View File

@@ -69,13 +69,13 @@ may look like ` {"agent": array([1, 0]), "target": array([0, 3])}`.
Since we have 4 actions in our environment ("right", "up", "left", "down"), we will use `Discrete(4)` as an action space.
Here is the declaration of `GridWorldEnv` and the implementation of `__init__`:
```python
import gymnasium
import gymnasium as gym
from gymnasium import spaces
import pygame
import numpy as np
class GridWorldEnv(gymnasium.Env):
class GridWorldEnv(gym.Env):
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
def __init__(self, render_mode=None, size=5):
@@ -354,14 +354,14 @@ After you have installed your package locally with `pip install -e gym-examples`
```python
import gym_examples
env = gymnasium.make('gym_examples/GridWorld-v0')
env = gym.make('gym_examples/GridWorld-v0')
```
You can also pass keyword arguments of your environment's constructor to `gymnasium.make` to customize the environment.
In our case, we could do:
```python
env = gymnasium.make('gym_examples/GridWorld-v0', size=10)
env = gym.make('gym_examples/GridWorld-v0', size=10)
```
Sometimes, you may find it more convenient to skip registration and call the environment's
@@ -382,7 +382,7 @@ a wrapper on top of environment instances to flatten observations into a single
import gym_examples
from gymnasium.wrappers import FlattenObservation
env = gymnasium.make('gym_examples/GridWorld-v0')
env = gym.make('gym_examples/GridWorld-v0')
wrapped_env = FlattenObservation(env)
print(wrapped_env.reset()) # E.g. [3 0 3 3], {}
```
@@ -396,7 +396,7 @@ a wrapper that does this job. This wrapper is also available in gym-examples:
import gym_examples
from gym_examples.wrappers import RelativePosition
env = gymnasium.make('gym_examples/GridWorld-v0')
env = gym.make('gym_examples/GridWorld-v0')
wrapped_env = RelativePosition(env)
print(wrapped_env.reset()) # E.g. [-3 3], {}
```

View File

@@ -19,10 +19,11 @@ Similar to `gymnasium.make`, you can run a vectorized version of a registered en
The following example runs 3 copies of the ``CartPole-v1`` environment in parallel, taking as input a vector of 3 binary actions (one for each copy of the environment), and returning an array of 3 observations stacked along the first dimension, with an array of rewards returned by each copy, and an array of booleans indicating if the episode in each parallel environment has ended.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> import gymnasium as gym
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
@@ -31,7 +32,7 @@ array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
dtype=float32)
>>> rewards
array([1., 1., 1.])
>>> dones
>>> terminated
array([False, False, False])
>>> infos
{}
@@ -48,25 +49,25 @@ The function `gymnasium.vector.make` is meant to be used only in basic cases (e.
To create a vectorized environment that runs multiple environment copies, you can wrap your parallel environments inside `gymnasium.vector.SyncVectorEnv` (for sequential execution), or `gymnasium.vector.AsyncVectorEnv` (for parallel execution, with [multiprocessing](https://docs.python.org/3/library/multiprocessing.html)). These vectorized environments take as input a list of callables specifying how the copies are created.
```python
>>> envs = gymnasium.vector.AsyncVectorEnv([
... lambda: gymnasium.make("CartPole-v1"),
... lambda: gymnasium.make("CartPole-v1"),
... lambda: gymnasium.make("CartPole-v1")
>>> envs = gym.vector.AsyncVectorEnv([
... lambda: gym.make("CartPole-v1"),
... lambda: gym.make("CartPole-v1"),
... lambda: gym.make("CartPole-v1")
... ])
```
Alternatively, to create a vectorized environment of multiple copies of the same registered environment, you can use the function `gymnasium.vector.make()`.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3) # Equivalent
>>> envs = gym.vector.make("CartPole-v1", num_envs=3) # Equivalent
```
To enable automatic batching of actions and observations, all of the environment copies must share the same `action_space` and `observation_space`. However, all of the parallel environments are not required to be exact copies of one another. For example, you can run 2 instances of ``Pendulum-v0`` with different values for gravity in a vectorized environment with:
```python
>>> env = gymnasium.vector.AsyncVectorEnv([
... lambda: gymnasium.make("Pendulum-v0", g=9.81),
... lambda: gymnasium.make("Pendulum-v0", g=1.62)
>>> env = gym.vector.AsyncVectorEnv([
... lambda: gym.make("Pendulum-v0", g=9.81),
... lambda: gym.make("Pendulum-v0", g=1.62)
... ])
```
@@ -76,14 +77,14 @@ When using `AsyncVectorEnv` with either the ``spawn`` or ``forkserver`` start me
```python
if __name__ == "__main__":
envs = gymnasium.vector.make("CartPole-v1", num_envs=3, context="spawn")
envs = gym.vector.make("CartPole-v1", num_envs=3, context="spawn")
```
### Working with vectorized environments
While standard Gymnasium environments take a single action and return a single observation (with a reward, and boolean indicating termination), vectorized environments take a *batch of actions* as input, and return a *batch of observations*, together with an array of rewards and booleans indicating if the episode ended in each environment copy.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
(array([[-0.02792548, -0.04423395, 0.00026012, 0.04486719],
[-0.04906582, 0.02779809, 0.02881928, -0.04467649],
@@ -91,7 +92,7 @@ While standard Gymnasium environments take a single action and return a single o
dtype=float32), {})
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
array([[ 0.00187507, 0.18986781, -0.03168437, -0.301252 ],
@@ -100,7 +101,7 @@ array([[ 0.00187507, 0.18986781, -0.03168437, -0.301252 ],
dtype=float32)
>>> rewards
array([1., 1., 1.])
>>> dones
>>> terminated
array([False, False, False])
>>> infos
{}
@@ -109,15 +110,15 @@ array([False, False, False])
Vectorized environments are compatible with any environment, regardless of the action and observation spaces (e.g. container spaces like `gymnasium.spaces.Dict`, or any arbitrarily nested spaces). In particular, vectorized environments can automatically batch the observations returned by `VectorEnv.reset` and `VectorEnv.step` for any standard Gymnasium `Space` (e.g. `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, `gymnasium.spaces.Dict`, or any nested structure thereof). Similarly, vectorized environments can take batches of actions from any standard Gymnasium `Space`.
```python
>>> class DictEnv(gymnasium.Env):
... observation_space = gymnasium.spaces.Dict({
... "position": gymnasium.spaces.Box(-1., 1., (3,), np.float32),
... "velocity": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
>>> class DictEnv(gym.Env):
... observation_space = gym.spaces.Dict({
... "position": gym.spaces.Box(-1., 1., (3,), np.float32),
... "velocity": gym.spaces.Box(-1., 1., (2,), np.float32)
... })
... action_space = gymnasium.spaces.Dict({
... "fire": gymnasium.spaces.Discrete(2),
... "jump": gymnasium.spaces.Discrete(2),
... "acceleration": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
... action_space = gym.spaces.Dict({
... "fire": gym.spaces.Discrete(2),
... "jump": gym.spaces.Discrete(2),
... "acceleration": gym.spaces.Box(-1., 1., (2,), np.float32)
... })
...
... def reset(self):
@@ -125,9 +126,9 @@ Vectorized environments are compatible with any environment, regardless of the a
...
... def step(self, action):
... observation = self.observation_space.sample()
... return (observation, 0., False, {})
... return observation, 0., False, False, {}
>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
>>> envs = gym.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
>>> envs.observation_space
Dict(position:Box(-1.0, 1.0, (3, 3), float32), velocity:Box(-1.0, 1.0, (3, 2), float32))
>>> envs.action_space
@@ -139,7 +140,7 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
... "jump": np.array([0, 1, 0]),
... "acceleration": np.random.uniform(-1., 1., size=(3, 2))
... }
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
{"position": array([[-0.5337036 , 0.7439302 , 0.41748118],
[ 0.9373266 , -0.5780453 , 0.8987405 ],
@@ -152,13 +153,13 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
The environment copies inside a vectorized environment automatically call `gymnasium.Env.reset` at the end of an episode. In the following example, the episode of the 3rd copy ends after 2 steps (the agent fell in a hole), and the paralle environment gets reset (observation ``0``).
```python
>>> envs = gymnasium.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
>>> envs = gym.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
>>> envs.reset()
(array([0, 0, 0]), {'prob': array([1, 1, 1]), '_prob': array([ True, True, True])})
>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 2]))
>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 1]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 2]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 1]))
>>> dones
>>> terminated
array([False, False, True])
>>> observations
array([8, 2, 0])
@@ -170,22 +171,23 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> observations, infos = envs.reset()
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> dones = np.logical_or(terminated, truncated)
>>> while not any(dones):
... observations, rewards, dones, infos = envs.step(actions)
... observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> print(dones)
[False, True, False]
>>> print(infos)
{'terminal_observation': array([None,
{'final_observation': array([None,
array([-0.11350546, -1.8090094 , 0.23710881, 2.8017728 ], dtype=float32),
None], dtype=object), '_terminal_observation': array([False, True, False])}
None], dtype=object), '_final_observation': array([False, True, False])}
```
@@ -193,7 +195,7 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt
Like any Gymnasium environment, vectorized environments contain the two properties `VectorEnv.observation_space` and `VectorEnv.action_space` to specify the observation and action spaces of the environments. Since vectorized environments operate on multiple environment copies, where the actions taken and observations returned by all of the copies are batched together, the observation and action *spaces* are batched as well so that the input actions are valid elements of `VectorEnv.action_space`, and the observations are valid elements of `VectorEnv.observation_space`.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.observation_space
Box([[-4.8 ...]], [[4.8 ...]], (3, 4), float32)
>>> envs.action_space
@@ -203,9 +205,9 @@ MultiDiscrete([2 2 2])
In order to appropriately batch the observations and actions in vectorized environments, the observation and action spaces of all of the copies are required to be identical.
```python
>>> envs = gymnasium.vector.AsyncVectorEnv([
... lambda: gymnasium.make("CartPole-v1"),
... lambda: gymnasium.make("MountainCar-v0")
>>> envs = gym.vector.AsyncVectorEnv([
... lambda: gym.make("CartPole-v1"),
... lambda: gym.make("MountainCar-v0")
... ])
RuntimeError: Some environments have an observation space different from `Box([-4.8 ...], [4.8 ...], (4,), float32)`.
In order to batch observations, the observation spaces from all environments must be equal.
@@ -213,7 +215,7 @@ In order to batch observations, the observation spaces from all environments mus
However, sometimes it may be handy to have access to the observation and action spaces of a particular copy, and not the batched spaces. You can access those with the properties `VectorEnv.single_observation_space` and `VectorEnv.single_action_space` of the vectorized environment.
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.single_observation_space
Box([-4.8 ...], [4.8 ...], (4,), float32)
>>> envs.single_action_space
@@ -229,14 +231,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
... logits = np.dot(observations, weights)
... return softmax(logits, axis=1)
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> weights = np.random.randn(
... flatdim(envs.single_observation_space),
... envs.single_action_space.n
... )
>>> observations, infos = envs.reset()
>>> actions = policy(weights, observations).argmax(axis=1)
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
```
## Intermediate Usage
@@ -245,14 +247,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
`AsyncVectorEnv` runs each environment copy inside an individual process. At each call to `AsyncVectorEnv.reset` or `AsyncVectorEnv.step`, the observations of all of the parallel environments are sent back to the main process. To avoid expensive transfers of data between processes, especially with large observations (e.g. images), `AsyncVectorEnv` uses a shared memory by default (``shared_memory=True``) that processes can write to and read from at minimal cost. This can increase the throughout of the vectorized environment.
```python
>>> env_fns = [lambda: gymnasium.make("BreakoutNoFrameskip-v4")] * 5
>>> env_fns = [lambda: gym.make("BreakoutNoFrameskip-v4")] * 5
>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=False)
>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=False)
>>> envs.reset()
>>> %timeit envs.step(envs.action_space.sample())
2.23 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=True)
>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=True)
>>> envs.reset()
>>> %timeit envs.step(envs.action_space.sample())
1.36 ms ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
@@ -262,9 +264,9 @@ This is convenient, for example, if you instantiate a policy. In the following e
Because sometimes things may not go as planned, the exceptions raised in any given environment copy are re-raised in the vectorized environment, even when the copy run in parallel with `AsyncVectorEnv`. This way, you can choose how to handle these exceptions yourself (with ``try ... except``).
```python
>>> class ErrorEnv(gymnasium.Env):
... observation_space = gymnasium.spaces.Box(-1., 1., (2,), np.float32)
... action_space = gymnasium.spaces.Discrete(2)
>>> class ErrorEnv(gym.Env):
... observation_space = gym.spaces.Box(-1., 1., (2,), np.float32)
... action_space = gym.spaces.Discrete(2)
...
... def reset(self):
... return np.zeros((2,), dtype=np.float32), {}
@@ -273,11 +275,11 @@ Because sometimes things may not go as planned, the exceptions raised in any giv
... if action == 1:
... raise ValueError("An error occurred.")
... observation = self.observation_space.sample()
... return (observation, 0., False, {})
... return observation, 0., False, False, {}
>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: ErrorEnv()] * 3)
>>> observations, infos = envs.reset()
>>> observations, rewards, dones, infos = envs.step(np.array([0, 0, 1]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([0, 0, 1]))
ERROR: Received the following error from Worker-2: ValueError: An error occurred.
ERROR: Shutting down Worker-2.
ERROR: Raising the last exception back to the main process.
@@ -292,7 +294,7 @@ Vectorized environments will batch actions and observations if they are elements
In the following example, we create a new environment `SMILESEnv`, whose observations are strings representing the [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) notation of a molecular structure, with a custom observation space `SMILES`. The observations returned by the vectorized environment are contained in a tuple of strings.
```python
>>> class SMILES(gymnasium.Space):
>>> class SMILES(gym.Space):
... def __init__(self, symbols):
... super().__init__()
... self.symbols = symbols
@@ -300,9 +302,9 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
... def __eq__(self, other):
... return self.symbols == other.symbols
>>> class SMILESEnv(gymnasium.Env):
>>> class SMILESEnv(gym.Env):
... observation_space = SMILES("][()CO=")
... action_space = gymnasium.spaces.Discrete(7)
... action_space = gym.spaces.Discrete(7)
...
... def reset(self):
... self._state = "["
@@ -310,15 +312,15 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
...
... def step(self, action):
... self._state += self.observation_space.symbols[action]
... reward = done = (action == 0)
... return (self._state, float(reward), done, {})
... reward = terminated = (action == 0)
... return self._state, float(reward), terminated, False, {}
>>> envs = gymnasium.vector.AsyncVectorEnv(
>>> envs = gym.vector.AsyncVectorEnv(
... [lambda: SMILESEnv()] * 3,
... shared_memory=False
... )
>>> envs.reset()
>>> observations, rewards, dones, infos = envs.step(np.array([2, 5, 4]))
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([2, 5, 4]))
>>> observations
('[(', '[O', '[C')
```

View File

@@ -13,7 +13,7 @@ firstpage:
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|-------------------|--------------------------------|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (250, 160, 3) |
| Observation High | 255 |
@@ -61,7 +61,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------|----------------------|----------------|
|-------------|-------------|--------------------|--------------|
| Adventure | `[0, 1, 2]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -12,7 +12,7 @@ title: Air Raid
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (250, 160, 3) |
| Observation High | 255 |
@@ -31,7 +31,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | RIGHT |
@@ -69,7 +69,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| AirRaid | `[1, ..., 8]` | `[0]` | `1` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -12,7 +12,7 @@ title: Alien
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| Alien | `[0, ..., 3]` | `[0, ..., 3]` | `0` |

View File

@@ -12,7 +12,7 @@ title: Amidar
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -33,7 +33,7 @@ flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -79,7 +79,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Amidar | `[0]` | `[0, 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Assault
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -30,7 +30,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -66,7 +66,7 @@ env = gymnasium.make("ALE/Assault-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Assault | `[0]` | `[0]` | `0` |

View File

@@ -11,7 +11,7 @@ title: Asterix
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -34,7 +34,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -75,7 +75,7 @@ env = gymnasium.make("ALE/Asterix-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Asterix | `[0]` | `[0]` | `0` |

View File

@@ -11,7 +11,7 @@ title: Asteroids
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -84,7 +84,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------------|--------------------|--------------|
| Asteroids | `[0, ..., 31, 128]` | `[0, 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -12,7 +12,7 @@ title: Atlantis
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -75,7 +75,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| Atlantis | `[0, ..., 3]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Bank Heist
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------------------------|--------------------|--------------|
| BankHeist | `[0, 4, 8, 12, 16, 20, 24, 28]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -12,7 +12,7 @@ title: Battle Zone
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|---------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| BattleZone | `[1, 2, 3]` | `[0]` | `1` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -18,7 +18,7 @@ grid:
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -85,7 +85,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| BeamRider | `[0]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Berzerk
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -63,7 +63,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------------------|--------------------|--------------|
| Berzerk | `[1, ..., 9, 16, 17, 18]` | `[0]` | `1` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Bowling
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -77,7 +77,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Bowling | `[0, 2, 4]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Boxing
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -62,7 +62,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Boxing | `[0]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Breakout
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -32,7 +32,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|--------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | RIGHT |
@@ -72,7 +72,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------------------------------------------|--------------------|--------------|
| Breakout | `[0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Carnival
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (214, 160, 3) |
| Observation High | 255 |
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Carnival | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Centipede
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -67,7 +67,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Centipede | `[22, 86]` | `[0]` | `22` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Chopper Command
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -64,7 +64,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|----------------|-------------|--------------------|--------------|
| ChopperCommand | `[0, 2]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Crazy Climber
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (250, 160, 3) |
| Observation High | 255 |
@@ -36,7 +36,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -79,7 +79,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|--------------|---------------|--------------------|--------------|
| CrazyClimber | `[0, ..., 3]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -12,7 +12,7 @@ title: Defender
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|-------------------|-------------------------------|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -67,7 +67,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------|--------------------|---------------|
|-------------|-------------------|--------------------|--------------|
| Defender | `[1, ..., 9, 16]` | `[0, 1]` | `1` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Demon Attack
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|----------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -35,7 +35,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -80,7 +80,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|----------------|--------------------|--------------|
| DemonAttack | `[1, 3, 5, 7]` | `[0, 1]` | `1` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Double Dunk
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|---------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (250, 160, 3) |
| Observation High | 255 |
@@ -33,7 +33,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -78,7 +78,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|----------------|--------------------|--------------|
| DoubleDunk | `[0, ..., 15]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Elevator Action
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (250, 160, 3) |
| Observation High | 255 |
@@ -36,7 +36,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -81,7 +81,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|----------------|-------------|--------------------|--------------|
| ElevatorAction | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Enduro
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (250, 160, 3) |
| Observation High | 255 |
@@ -33,7 +33,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Enduro | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: FishingDerby
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
The action space a subset of the following discrete set of legal actions:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -84,10 +84,10 @@ env = gymnasium.make("ALE/FishingDerby-v5")
```
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
| FishingDerby | `[0]` | `[0, ..., 3]` | `0` |
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|--------------|-------------|--------------------|--------------|
| FishingDerby | `[0]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
@@ -100,7 +100,7 @@ A thorough discussion of the intricate differences between the versions and conf
general article on Atari environments.
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
| ----- | --------- | ------------------------- | ---------|
|---------|--------------|------------------------------|----------------------|
| v0 | `(2, 5,)` | `0.25` | `False` |
| v4 | `(2, 5,)` | `0.0` | `False` |
| v5 | `5` | `0.25` | `True` |

View File

@@ -11,7 +11,7 @@ title: Freeway
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
The action space a subset of the following discrete set of legal actions:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Freeway-v5")
```
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| Freeway | `[0, ..., 7]` | `[0, 1]` | `0` |

View File

@@ -11,7 +11,7 @@ title: Frostbite
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
The action space a subset of the following discrete set of legal actions:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Frostbite-v5")
```
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Frostbite | `[0, 2]` | `[0]` | `0` |
@@ -100,7 +101,7 @@ A thorough discussion of the intricate differences between the versions and conf
general article on Atari environments.
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
| ----- | --------- | ------------------------- | ---------|
|---------|--------------|------------------------------|----------------------|
| v0 | `(2, 5,)` | `0.25` | `False` |
| v4 | `(2, 5,)` | `0.0` | `False` |
| v5 | `5` | `0.25` | `True` |

View File

@@ -11,7 +11,7 @@ title: Gopher
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
The action space a subset of the following discrete set of legal actions:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Gopher-v5")
```
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Gopher | `[0, 2]` | `[0, 1]` | `0` |
@@ -100,7 +101,7 @@ A thorough discussion of the intricate differences between the versions and conf
general article on Atari environments.
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
| ----- | --------- | ------------------------- | ---------|
|---------|--------------|------------------------------|----------------------|
| v0 | `(2, 5,)` | `0.25` | `False` |
| v4 | `(2, 5,)` | `0.0` | `False` |
| v5 | `5` | `0.25` | `True` |

View File

@@ -11,7 +11,7 @@ title: Gravitar
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
The action space a subset of the following discrete set of legal actions:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Gravitar-v5")
```
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| Gravitar | `[0, ..., 4]` | `[0]` | `0` |
@@ -100,7 +101,7 @@ A thorough discussion of the intricate differences between the versions and conf
general article on Atari environments.
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
| ----- | --------- | ------------------------- | ---------|
|---------|--------------|------------------------------|----------------------|
| v0 | `(2, 5,)` | `0.25` | `False` |
| v4 | `(2, 5,)` | `0.0` | `False` |
| v5 | `5` | `0.25` | `True` |

View File

@@ -12,7 +12,7 @@ title: Hero
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|-------------------|---------------------------|
|-------------------|---------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -68,7 +68,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|--------------------------|--------------------|---------------|
|-------------|---------------|--------------------|--------------|
| Hero | `[0, ..., 4]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: IceHockey
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -65,7 +65,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| IceHockey | `[0, 2]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -84,7 +84,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
The action space a subset of the following discrete set of legal actions:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -169,7 +169,7 @@ All Atari games are available in three versions. They differ in the default sett
The differences are listed in the following table:
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
| ----- | --------- | ------------------------- | ---------|
|---------|--------------|------------------------------|----------------------|
| v0 | `(2, 5,)` | `0.25` | `False` |
| v4 | `(2, 5,)` | `0.0` | `False` |
| v5 | `5` | `0.25` | `True` |
@@ -181,7 +181,7 @@ For each Atari game, several different configurations are registered in Gymnasiu
v0 and v4. Let us take a look at all variations of Amidar-v0 that are registered with gymnasium:
| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
| ---------------------------- | -------- | --------- | ------------------------- | ----------------- |
|----------------------------|-------------|--------------|------------------------------|----------------------|
| Amidar-v0 | `"rgb"` | `(2, 5,)` | `0.25` | `False` |
| AmidarDeterministic-v0 | `"rgb"` | `4` | `0.0` | `False` |
| AmidarNoframeskip-v0 | `"rgb"` | `1` | `0.25` | `False` |
@@ -194,7 +194,7 @@ environment configuration via arguments passed to `gymnasium.make`. Moreover, th
are in the "ALE" namespace. The suffix "-ram" is still available. Thus, we get the following table:
| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
| ---------------------------- | -------- | --------- | ------------------------- | ----------------- |
|-------------------|-------------|--------------|------------------------------|----------------------|
| ALE/Amidar-v5 | `"rgb"` | `5` | `0.25` | `True` |
| ALE/Amidar-ram-v5 | `"ram"` | `5` | `0.25` | `True` |

View File

@@ -11,7 +11,7 @@ title: Jamesbond
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Jamesbond | `[0, 1]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: JourneyEscape
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -31,7 +31,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 2 | UP |
| 3 | RIGHT |
@@ -49,9 +49,6 @@ flavor looks like this:
| 16 | DOWNRIGHTFIRE |
| 17 | DOWNLEFTFIRE |
### Observations
By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
possible to observe

View File

@@ -11,7 +11,7 @@ title: Kangaroo
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -64,7 +64,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Kangaroo | `[0, 1]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Krull
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -64,7 +64,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Krull | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Kung Fu Master
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ You are a Kung-Fu Master fighting your way through the Evil Wizard's temple. You
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|---------------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -68,7 +68,7 @@ env = gymnasium.make("ALE/KungFuMaster-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|--------------|-------------|--------------------|--------------|
| KungFuMaster | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Montezuma Revenge
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|---------------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -53,7 +53,7 @@ env = gymnasium.make("ALE/MontezumaRevenge-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|------------------|-------------|--------------------|--------------|
| MontezumaRevenge | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Ms Pacman
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ Your goal is to collect all of the pellets on the screen while avoiding the ghos
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -63,7 +63,7 @@ env = gymnasium.make("ALE/MsPacman-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| MsPacman | `[0, ..., 3]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Name This Game
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ Your goal is to defend the treasure that you have discovered. You must fight off
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | RIGHT |
@@ -61,7 +61,7 @@ env = gymnasium.make("ALE/NameThisGame-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|--------------|---------------|--------------------|--------------|
| NameThisGame | `[8, 24, 40]` | `[0, 1]` | `8` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Phoenix
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ Your goal is to reach and shoot the alien pilot. On your way there, you must eli
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | RIGHT |
@@ -62,7 +62,7 @@ env = gymnasium.make("ALE/Phoenix-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Phoenix | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Pitfall
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -59,7 +59,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Pitfall | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Pong
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|---------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | RIGHT |
@@ -71,7 +71,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Pong | `[0, 1]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Pooyan
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -30,7 +30,7 @@ number of actions (those that are meaningful in this game) are available. The re
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -38,8 +38,6 @@ flavor looks like this:
| 4 | UPFIRE |
| 5 | DOWNFIRE |
### Observations
By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
- The 128 Bytes of RAM of the console
@@ -72,7 +70,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|--------------------|--------------------|--------------|
| Pooyan | `[10, 30, 50, 70]` | `[0]` | `10` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: PrivateEye
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|---------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -59,7 +59,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| PrivateEye | `[0, ..., 4]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Qbert
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|--------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -70,7 +70,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Qbert | `[0]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Riverraid
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -74,7 +74,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-----------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Riverraid | `[0]` | `[0,1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Road Runner
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|---------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -57,7 +57,7 @@ via `gymnasium.make`.
Score points are your only reward. You get score points each time you:
| actions | points |
|-----------------------------------------------------------|-----|
|-------------------------------------------------------|--------|
| eat a pile of birdseed | 100 |
| eat steel shot | 100 |
| get the coyote hit by a mine (cannonball, rock, etc.) | 200 |
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------------|------|----------|--------------|
|-------------|-------------|--------------------|--------------|
| RoadRunner | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Robot Tank
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -75,7 +75,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Robotank | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Seaquest
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -85,7 +85,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|------------------------------|---------------|--------------|
|-------------|-------------|--------------------|--------------|
| Seaquest | `[0]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Skiing
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -37,7 +37,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
flavor looks like this:
| Num | Action |
|-----|------|
|-----|--------|
| 0 | NOOP |
| 1 | RIGHT |
| 2 | LEFT |
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|------------------------------|---------------|--------------|
|-------------|-------------|--------------------|--------------|
| Skiing | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Solaris
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -57,7 +57,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Solaris | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: SpaceInvaders
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | RIGHT |
@@ -70,7 +70,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|---------------|----------------|--------------------|--------------|
| SpaceInvaders | `[0, ..., 15]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: StarGunner
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|---------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|--------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -71,7 +71,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|---------------|--------------------|--------------|
| StarGunner | `[0, ..., 3]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: Tennis
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -60,7 +60,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Tennis | `[0, 2]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"

View File

@@ -11,7 +11,7 @@ title: TimePilot
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| TimePilot | `[0]` | `[0, 1, 2]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in

View File

@@ -11,7 +11,7 @@ title: Tutankham
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|--------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ Your goal is to rack up points by finding treasures in the mazes of the tomb whi
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | UP |
| 2 | RIGHT |
@@ -62,7 +62,7 @@ env = gymnasium.make("ALE/Tutankham-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Tutankham | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Up n' Down
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ Your goal is to steer your baja bugger to collect prizes and eliminate opponents
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -61,7 +61,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| UpNDown | `[0]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Venture
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -53,7 +53,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Venture | `[0]` | `[0, ..., 3]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Video Pinball
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ Your goal is to keep the ball in play as long as possible and to score as many p
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -65,7 +65,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|--------------|---------------|--------------------|--------------|
| VideoPinball | `[0, ..., 2]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -11,7 +11,7 @@ title: Wizard of Wor
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|----------------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -25,7 +25,7 @@ Your goal is to beat the Wizard using your laser and radar scanner. Detailed doc
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
| Num | Action |
|-----|------------------------|
|-----|-----------|
| 0 | NOOP |
| 1 | FIRE |
| 2 | UP |
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
A flavor is a combination of a game mode and a difficulty setting.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|--------------|-------------|--------------------|--------------|
| VideoPinball | `[0]` | `[0, 1]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -12,7 +12,7 @@ lastpage:
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
| | |
|---|---|
|-------------------|-----------------------------------|
| Action Space | Discrete(18) |
| Observation Space | (210, 160, 3) |
| Observation High | 255 |
@@ -52,7 +52,7 @@ env = gymnasium.make("ALE/Zaxxon-v5")
The various ways to configure the environment are described in detail in the article on Atari environments.
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|-------------|-------------|--------------------|--------------|
| Zaxxon | `[0]` | `[0]` | `0` |
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"

View File

@@ -16,8 +16,8 @@ lastpage:
```{code-block} python
import gymnasium
env = gymnasium.make("LunarLander-v2", render_mode="human")
import gymnasium as gym
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset(seed=42)
for _ in range(1000):
action = policy(observation) # User-defined policy function