mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 22:11:25 +00:00
Rename gymnasium as gym in docs (#24)
This commit is contained in:
@@ -54,7 +54,8 @@ title: Vector
|
||||
```
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> import gymnasium as gym
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs.reset()
|
||||
(array([[-0.02240574, -0.03439831, -0.03904812, 0.02810693],
|
||||
[ 0.01586068, 0.01929009, 0.02394426, 0.04016077],
|
||||
@@ -68,10 +69,10 @@ title: Vector
|
||||
```
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs.reset()
|
||||
>>> actions = np.array([1, 0, 1])
|
||||
>>> observations, rewards, dones, infos = envs.step(actions)
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
|
||||
|
||||
>>> observations
|
||||
array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
|
||||
@@ -80,7 +81,7 @@ array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
|
||||
dtype=float32)
|
||||
>>> rewards
|
||||
array([1., 1., 1.])
|
||||
>>> dones
|
||||
>>> terminated
|
||||
array([False, False, False])
|
||||
>>> infos
|
||||
{}
|
||||
|
@@ -12,9 +12,9 @@ also be chained to combine their effects. Most environments that are generated v
|
||||
In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along
|
||||
with (possibly optional) parameters to the wrapper's constructor:
|
||||
```python
|
||||
>>> import gymnasium
|
||||
>>> import gymnasium as gym
|
||||
>>> from gymnasium.wrappers import RescaleAction
|
||||
>>> base_env = gymnasium.make("BipedalWalker-v3")
|
||||
>>> base_env = gym.make("BipedalWalker-v3")
|
||||
>>> base_env.action_space
|
||||
Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32)
|
||||
>>> wrapped_env = RescaleAction(base_env, min_action=0, max_action=1)
|
||||
@@ -64,7 +64,7 @@ Let's say you have an environment with action space of type `Box`, but you would
|
||||
only like to use a finite subset of actions. Then, you might want to implement the following wrapper
|
||||
|
||||
```python
|
||||
class DiscreteActions(gymnasium.ActionWrapper):
|
||||
class DiscreteActions(gym.ActionWrapper):
|
||||
def __init__(self, env, disc_to_cont):
|
||||
super().__init__(env)
|
||||
self.disc_to_cont = disc_to_cont
|
||||
@@ -74,7 +74,7 @@ class DiscreteActions(gymnasium.ActionWrapper):
|
||||
return self.disc_to_cont[act]
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = gymnasium.make("LunarLanderContinuous-v2")
|
||||
env = gym.make("LunarLanderContinuous-v2")
|
||||
wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]),
|
||||
np.array([0,1]), np.array([0,-1])])
|
||||
print(wrapped_env.action_space) #Discrete(4)
|
||||
@@ -95,7 +95,7 @@ the position of the target relative to the agent, i.e. `observation["target_posi
|
||||
For this, you could implement an observation wrapper like this:
|
||||
|
||||
```python
|
||||
class RelativePosition(gymnasium.ObservationWrapper):
|
||||
class RelativePosition(gym.ObservationWrapper):
|
||||
def __init__(self, env):
|
||||
super().__init__(env)
|
||||
self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)
|
||||
@@ -117,7 +117,7 @@ Let us look at an example: Sometimes (especially when we do not have control ove
|
||||
to a range to gain some numerical stability. To do that, we could, for instance, implement the following wrapper:
|
||||
|
||||
```python
|
||||
class ClipReward(gymnasium.RewardWrapper):
|
||||
class ClipReward(gym.RewardWrapper):
|
||||
def __init__(self, env, min_reward, max_reward):
|
||||
super().__init__(env)
|
||||
self.min_reward = min_reward
|
||||
@@ -137,7 +137,7 @@ When calling step causes `self.env.step()` to return `done=True`,
|
||||
and the return format of `self.step()` is as follows:
|
||||
|
||||
```python
|
||||
new_obs, terminal_reward, terminal_done, info
|
||||
new_obs, terminal_reward, terminated, truncated, info
|
||||
```
|
||||
|
||||
`new_obs` is the first observation after calling `self.env.reset()`,
|
||||
@@ -145,7 +145,7 @@ new_obs, terminal_reward, terminal_done, info
|
||||
`terminal_reward` is the reward after calling `self.env.step()`,
|
||||
prior to calling `self.env.reset()`
|
||||
|
||||
`terminal_done` is always `True`
|
||||
`terminated or truncated` is always `True`
|
||||
|
||||
`info` is a dict containing all the keys from the info dict returned by
|
||||
the call to `self.env.reset()`, with additional keys `terminal_observation`
|
||||
@@ -156,7 +156,7 @@ to `self.env.step()`.
|
||||
If `done` is not true when `self.env.step()` is called, `self.step()` returns
|
||||
|
||||
```python
|
||||
obs, reward, done, info
|
||||
obs, reward, terminated, truncated, info
|
||||
```
|
||||
as normal.
|
||||
|
||||
@@ -164,12 +164,12 @@ as normal.
|
||||
The AutoResetWrapper is not applied by default when calling `gymnasium.make()`, but can be applied by setting the optional `autoreset` argument to `True`:
|
||||
|
||||
```python
|
||||
env = gymnasium.make("CartPole-v1", autoreset=True)
|
||||
env = gym.make("CartPole-v1", autoreset=True)
|
||||
```
|
||||
|
||||
The AutoResetWrapper can also be applied using its constructor:
|
||||
```python
|
||||
env = gymnasium.make("CartPole-v1")
|
||||
env = gym.make("CartPole-v1")
|
||||
env = AutoResetWrapper(env)
|
||||
```
|
||||
|
||||
@@ -204,7 +204,7 @@ initialization of the environment. However, *Reacher* does not allow you to do t
|
||||
of the reward are returned in `info`, so let us build a wrapper for Reacher that allows us to weight those terms:
|
||||
|
||||
```python
|
||||
class ReacherRewardWrapper(gymnasium.Wrapper):
|
||||
class ReacherRewardWrapper(gym.Wrapper):
|
||||
def __init__(self, env, reward_dist_weight, reward_ctrl_weight):
|
||||
super().__init__(env)
|
||||
self.reward_dist_weight = reward_dist_weight
|
||||
@@ -226,7 +226,7 @@ It is *not* sufficient to use a `RewardWrapper` in this case!
|
||||
## Available Wrappers
|
||||
|
||||
| Name | Type | Arguments | Description |
|
||||
|---------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
|---------------------------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `AtariPreprocessing` | `gymnasium.Wrapper` | `env: gymnasium.Env`, `noop_max: int = 30`, `frame_skip: int = 4`, `screen_size: int = 84`, `terminal_on_life_loss: bool = False`, `grayscale_obs: bool = True`, `grayscale_newaxis: bool = False`, `scale_obs: bool = False` | Implements the best practices from Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents" but will be deprecated soon. |
|
||||
| `AutoResetWrapper` | `gymnasium.Wrapper` | `env` | The wrapped environment will automatically reset when the done state is reached. Make sure you read the documentation before using this wrapper! |
|
||||
| `ClipAction` | `gymnasium.ActionWrapper` | `env` | Clip the continuous action to the valid bound specified by the environment's `action_space` |
|
||||
|
@@ -10,8 +10,8 @@ firstpage:
|
||||
Initializing environments is very easy in Gymnasium and can be done via:
|
||||
|
||||
```python
|
||||
import gymnasium
|
||||
env = gymnasium.make('CartPole-v0')
|
||||
import gymnasium as gym
|
||||
env = gym.make('CartPole-v0')
|
||||
```
|
||||
|
||||
## Interacting with the Environment
|
||||
@@ -46,14 +46,15 @@ Let's see what the agent-environment loop looks like in Gymnasium.
|
||||
This example will run an instance of `LunarLander-v2` environment for 1000 timesteps. Since we pass `render_mode="human"`, you should see a window pop up rendering the environment.
|
||||
|
||||
```python
|
||||
import gymnasium
|
||||
env = gymnasium.make("LunarLander-v2", render_mode="human")
|
||||
import gymnasium as gym
|
||||
env = gym.make("LunarLander-v2", render_mode="human")
|
||||
env.action_space.seed(42)
|
||||
|
||||
observation, info = env.reset(seed=42)
|
||||
|
||||
for _ in range(1000):
|
||||
observation, reward, terminated, truncated, info = env.step(env.action_space.sample())
|
||||
action = env.action_space.sample()
|
||||
observation, reward, terminated, truncated, info = env.step(action)
|
||||
|
||||
if terminated or truncated:
|
||||
observation, info = env.reset()
|
||||
@@ -201,7 +202,7 @@ For example, if pressing the keys `w` and `space` at the same time is supposed t
|
||||
```
|
||||
As a more complete example, let's say we wish to play with `CartPole-v0` using our left and right arrow keys. The code would be as follows:
|
||||
```python
|
||||
import gymnasium
|
||||
import gymnasium as gym
|
||||
import pygame
|
||||
from gymnasium.utils.play import play
|
||||
mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}
|
||||
|
@@ -69,13 +69,13 @@ may look like ` {"agent": array([1, 0]), "target": array([0, 3])}`.
|
||||
Since we have 4 actions in our environment ("right", "up", "left", "down"), we will use `Discrete(4)` as an action space.
|
||||
Here is the declaration of `GridWorldEnv` and the implementation of `__init__`:
|
||||
```python
|
||||
import gymnasium
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
import pygame
|
||||
import numpy as np
|
||||
|
||||
|
||||
class GridWorldEnv(gymnasium.Env):
|
||||
class GridWorldEnv(gym.Env):
|
||||
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
|
||||
|
||||
def __init__(self, render_mode=None, size=5):
|
||||
@@ -354,14 +354,14 @@ After you have installed your package locally with `pip install -e gym-examples`
|
||||
|
||||
```python
|
||||
import gym_examples
|
||||
env = gymnasium.make('gym_examples/GridWorld-v0')
|
||||
env = gym.make('gym_examples/GridWorld-v0')
|
||||
```
|
||||
|
||||
You can also pass keyword arguments of your environment's constructor to `gymnasium.make` to customize the environment.
|
||||
In our case, we could do:
|
||||
|
||||
```python
|
||||
env = gymnasium.make('gym_examples/GridWorld-v0', size=10)
|
||||
env = gym.make('gym_examples/GridWorld-v0', size=10)
|
||||
```
|
||||
|
||||
Sometimes, you may find it more convenient to skip registration and call the environment's
|
||||
@@ -382,7 +382,7 @@ a wrapper on top of environment instances to flatten observations into a single
|
||||
import gym_examples
|
||||
from gymnasium.wrappers import FlattenObservation
|
||||
|
||||
env = gymnasium.make('gym_examples/GridWorld-v0')
|
||||
env = gym.make('gym_examples/GridWorld-v0')
|
||||
wrapped_env = FlattenObservation(env)
|
||||
print(wrapped_env.reset()) # E.g. [3 0 3 3], {}
|
||||
```
|
||||
@@ -396,7 +396,7 @@ a wrapper that does this job. This wrapper is also available in gym-examples:
|
||||
import gym_examples
|
||||
from gym_examples.wrappers import RelativePosition
|
||||
|
||||
env = gymnasium.make('gym_examples/GridWorld-v0')
|
||||
env = gym.make('gym_examples/GridWorld-v0')
|
||||
wrapped_env = RelativePosition(env)
|
||||
print(wrapped_env.reset()) # E.g. [-3 3], {}
|
||||
```
|
||||
|
@@ -19,10 +19,11 @@ Similar to `gymnasium.make`, you can run a vectorized version of a registered en
|
||||
The following example runs 3 copies of the ``CartPole-v1`` environment in parallel, taking as input a vector of 3 binary actions (one for each copy of the environment), and returning an array of 3 observations stacked along the first dimension, with an array of rewards returned by each copy, and an array of booleans indicating if the episode in each parallel environment has ended.
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> import gymnasium as gym
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs.reset()
|
||||
>>> actions = np.array([1, 0, 1])
|
||||
>>> observations, rewards, dones, infos = envs.step(actions)
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
|
||||
|
||||
>>> observations
|
||||
array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
|
||||
@@ -31,7 +32,7 @@ array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
|
||||
dtype=float32)
|
||||
>>> rewards
|
||||
array([1., 1., 1.])
|
||||
>>> dones
|
||||
>>> terminated
|
||||
array([False, False, False])
|
||||
>>> infos
|
||||
{}
|
||||
@@ -48,25 +49,25 @@ The function `gymnasium.vector.make` is meant to be used only in basic cases (e.
|
||||
To create a vectorized environment that runs multiple environment copies, you can wrap your parallel environments inside `gymnasium.vector.SyncVectorEnv` (for sequential execution), or `gymnasium.vector.AsyncVectorEnv` (for parallel execution, with [multiprocessing](https://docs.python.org/3/library/multiprocessing.html)). These vectorized environments take as input a list of callables specifying how the copies are created.
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.AsyncVectorEnv([
|
||||
... lambda: gymnasium.make("CartPole-v1"),
|
||||
... lambda: gymnasium.make("CartPole-v1"),
|
||||
... lambda: gymnasium.make("CartPole-v1")
|
||||
>>> envs = gym.vector.AsyncVectorEnv([
|
||||
... lambda: gym.make("CartPole-v1"),
|
||||
... lambda: gym.make("CartPole-v1"),
|
||||
... lambda: gym.make("CartPole-v1")
|
||||
... ])
|
||||
```
|
||||
|
||||
Alternatively, to create a vectorized environment of multiple copies of the same registered environment, you can use the function `gymnasium.vector.make()`.
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3) # Equivalent
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3) # Equivalent
|
||||
```
|
||||
|
||||
To enable automatic batching of actions and observations, all of the environment copies must share the same `action_space` and `observation_space`. However, all of the parallel environments are not required to be exact copies of one another. For example, you can run 2 instances of ``Pendulum-v0`` with different values for gravity in a vectorized environment with:
|
||||
|
||||
```python
|
||||
>>> env = gymnasium.vector.AsyncVectorEnv([
|
||||
... lambda: gymnasium.make("Pendulum-v0", g=9.81),
|
||||
... lambda: gymnasium.make("Pendulum-v0", g=1.62)
|
||||
>>> env = gym.vector.AsyncVectorEnv([
|
||||
... lambda: gym.make("Pendulum-v0", g=9.81),
|
||||
... lambda: gym.make("Pendulum-v0", g=1.62)
|
||||
... ])
|
||||
```
|
||||
|
||||
@@ -76,14 +77,14 @@ When using `AsyncVectorEnv` with either the ``spawn`` or ``forkserver`` start me
|
||||
|
||||
```python
|
||||
if __name__ == "__main__":
|
||||
envs = gymnasium.vector.make("CartPole-v1", num_envs=3, context="spawn")
|
||||
envs = gym.vector.make("CartPole-v1", num_envs=3, context="spawn")
|
||||
```
|
||||
### Working with vectorized environments
|
||||
While standard Gymnasium environments take a single action and return a single observation (with a reward, and boolean indicating termination), vectorized environments take a *batch of actions* as input, and return a *batch of observations*, together with an array of rewards and booleans indicating if the episode ended in each environment copy.
|
||||
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs.reset()
|
||||
(array([[-0.02792548, -0.04423395, 0.00026012, 0.04486719],
|
||||
[-0.04906582, 0.02779809, 0.02881928, -0.04467649],
|
||||
@@ -91,7 +92,7 @@ While standard Gymnasium environments take a single action and return a single o
|
||||
dtype=float32), {})
|
||||
|
||||
>>> actions = np.array([1, 0, 1])
|
||||
>>> observations, rewards, dones, infos = envs.step(actions)
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
|
||||
|
||||
>>> observations
|
||||
array([[ 0.00187507, 0.18986781, -0.03168437, -0.301252 ],
|
||||
@@ -100,7 +101,7 @@ array([[ 0.00187507, 0.18986781, -0.03168437, -0.301252 ],
|
||||
dtype=float32)
|
||||
>>> rewards
|
||||
array([1., 1., 1.])
|
||||
>>> dones
|
||||
>>> terminated
|
||||
array([False, False, False])
|
||||
>>> infos
|
||||
{}
|
||||
@@ -109,15 +110,15 @@ array([False, False, False])
|
||||
Vectorized environments are compatible with any environment, regardless of the action and observation spaces (e.g. container spaces like `gymnasium.spaces.Dict`, or any arbitrarily nested spaces). In particular, vectorized environments can automatically batch the observations returned by `VectorEnv.reset` and `VectorEnv.step` for any standard Gymnasium `Space` (e.g. `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, `gymnasium.spaces.Dict`, or any nested structure thereof). Similarly, vectorized environments can take batches of actions from any standard Gymnasium `Space`.
|
||||
|
||||
```python
|
||||
>>> class DictEnv(gymnasium.Env):
|
||||
... observation_space = gymnasium.spaces.Dict({
|
||||
... "position": gymnasium.spaces.Box(-1., 1., (3,), np.float32),
|
||||
... "velocity": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
|
||||
>>> class DictEnv(gym.Env):
|
||||
... observation_space = gym.spaces.Dict({
|
||||
... "position": gym.spaces.Box(-1., 1., (3,), np.float32),
|
||||
... "velocity": gym.spaces.Box(-1., 1., (2,), np.float32)
|
||||
... })
|
||||
... action_space = gymnasium.spaces.Dict({
|
||||
... "fire": gymnasium.spaces.Discrete(2),
|
||||
... "jump": gymnasium.spaces.Discrete(2),
|
||||
... "acceleration": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
|
||||
... action_space = gym.spaces.Dict({
|
||||
... "fire": gym.spaces.Discrete(2),
|
||||
... "jump": gym.spaces.Discrete(2),
|
||||
... "acceleration": gym.spaces.Box(-1., 1., (2,), np.float32)
|
||||
... })
|
||||
...
|
||||
... def reset(self):
|
||||
@@ -125,9 +126,9 @@ Vectorized environments are compatible with any environment, regardless of the a
|
||||
...
|
||||
... def step(self, action):
|
||||
... observation = self.observation_space.sample()
|
||||
... return (observation, 0., False, {})
|
||||
... return observation, 0., False, False, {}
|
||||
|
||||
>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
|
||||
>>> envs = gym.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
|
||||
>>> envs.observation_space
|
||||
Dict(position:Box(-1.0, 1.0, (3, 3), float32), velocity:Box(-1.0, 1.0, (3, 2), float32))
|
||||
>>> envs.action_space
|
||||
@@ -139,7 +140,7 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
|
||||
... "jump": np.array([0, 1, 0]),
|
||||
... "acceleration": np.random.uniform(-1., 1., size=(3, 2))
|
||||
... }
|
||||
>>> observations, rewards, dones, infos = envs.step(actions)
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
|
||||
>>> observations
|
||||
{"position": array([[-0.5337036 , 0.7439302 , 0.41748118],
|
||||
[ 0.9373266 , -0.5780453 , 0.8987405 ],
|
||||
@@ -152,13 +153,13 @@ Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(
|
||||
The environment copies inside a vectorized environment automatically call `gymnasium.Env.reset` at the end of an episode. In the following example, the episode of the 3rd copy ends after 2 steps (the agent fell in a hole), and the paralle environment gets reset (observation ``0``).
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
|
||||
>>> envs = gym.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
|
||||
>>> envs.reset()
|
||||
(array([0, 0, 0]), {'prob': array([1, 1, 1]), '_prob': array([ True, True, True])})
|
||||
>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 2]))
|
||||
>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 1]))
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 2]))
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([1, 2, 1]))
|
||||
|
||||
>>> dones
|
||||
>>> terminated
|
||||
array([False, False, True])
|
||||
>>> observations
|
||||
array([8, 2, 0])
|
||||
@@ -170,22 +171,23 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt
|
||||
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> observations, infos = envs.reset()
|
||||
|
||||
>>> actions = np.array([1, 0, 1])
|
||||
>>> observations, rewards, dones, infos = envs.step(actions)
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
|
||||
>>> dones = np.logical_or(terminated, truncated)
|
||||
|
||||
>>> while not any(dones):
|
||||
... observations, rewards, dones, infos = envs.step(actions)
|
||||
... observations, rewards, terminated, truncated, infos = envs.step(actions)
|
||||
|
||||
>>> print(dones)
|
||||
[False, True, False]
|
||||
|
||||
>>> print(infos)
|
||||
{'terminal_observation': array([None,
|
||||
{'final_observation': array([None,
|
||||
array([-0.11350546, -1.8090094 , 0.23710881, 2.8017728 ], dtype=float32),
|
||||
None], dtype=object), '_terminal_observation': array([False, True, False])}
|
||||
None], dtype=object), '_final_observation': array([False, True, False])}
|
||||
```
|
||||
|
||||
|
||||
@@ -193,7 +195,7 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt
|
||||
Like any Gymnasium environment, vectorized environments contain the two properties `VectorEnv.observation_space` and `VectorEnv.action_space` to specify the observation and action spaces of the environments. Since vectorized environments operate on multiple environment copies, where the actions taken and observations returned by all of the copies are batched together, the observation and action *spaces* are batched as well so that the input actions are valid elements of `VectorEnv.action_space`, and the observations are valid elements of `VectorEnv.observation_space`.
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs.observation_space
|
||||
Box([[-4.8 ...]], [[4.8 ...]], (3, 4), float32)
|
||||
>>> envs.action_space
|
||||
@@ -203,9 +205,9 @@ MultiDiscrete([2 2 2])
|
||||
In order to appropriately batch the observations and actions in vectorized environments, the observation and action spaces of all of the copies are required to be identical.
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.AsyncVectorEnv([
|
||||
... lambda: gymnasium.make("CartPole-v1"),
|
||||
... lambda: gymnasium.make("MountainCar-v0")
|
||||
>>> envs = gym.vector.AsyncVectorEnv([
|
||||
... lambda: gym.make("CartPole-v1"),
|
||||
... lambda: gym.make("MountainCar-v0")
|
||||
... ])
|
||||
RuntimeError: Some environments have an observation space different from `Box([-4.8 ...], [4.8 ...], (4,), float32)`.
|
||||
In order to batch observations, the observation spaces from all environments must be equal.
|
||||
@@ -213,7 +215,7 @@ In order to batch observations, the observation spaces from all environments mus
|
||||
However, sometimes it may be handy to have access to the observation and action spaces of a particular copy, and not the batched spaces. You can access those with the properties `VectorEnv.single_observation_space` and `VectorEnv.single_action_space` of the vectorized environment.
|
||||
|
||||
```python
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs.single_observation_space
|
||||
Box([-4.8 ...], [4.8 ...], (4,), float32)
|
||||
>>> envs.single_action_space
|
||||
@@ -229,14 +231,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
|
||||
... logits = np.dot(observations, weights)
|
||||
... return softmax(logits, axis=1)
|
||||
|
||||
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
|
||||
>>> weights = np.random.randn(
|
||||
... flatdim(envs.single_observation_space),
|
||||
... envs.single_action_space.n
|
||||
... )
|
||||
>>> observations, infos = envs.reset()
|
||||
>>> actions = policy(weights, observations).argmax(axis=1)
|
||||
>>> observations, rewards, dones, infos = envs.step(actions)
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
|
||||
```
|
||||
|
||||
## Intermediate Usage
|
||||
@@ -245,14 +247,14 @@ This is convenient, for example, if you instantiate a policy. In the following e
|
||||
`AsyncVectorEnv` runs each environment copy inside an individual process. At each call to `AsyncVectorEnv.reset` or `AsyncVectorEnv.step`, the observations of all of the parallel environments are sent back to the main process. To avoid expensive transfers of data between processes, especially with large observations (e.g. images), `AsyncVectorEnv` uses a shared memory by default (``shared_memory=True``) that processes can write to and read from at minimal cost. This can increase the throughout of the vectorized environment.
|
||||
|
||||
```python
|
||||
>>> env_fns = [lambda: gymnasium.make("BreakoutNoFrameskip-v4")] * 5
|
||||
>>> env_fns = [lambda: gym.make("BreakoutNoFrameskip-v4")] * 5
|
||||
|
||||
>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=False)
|
||||
>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=False)
|
||||
>>> envs.reset()
|
||||
>>> %timeit envs.step(envs.action_space.sample())
|
||||
2.23 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
|
||||
|
||||
>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=True)
|
||||
>>> envs = gym.vector.AsyncVectorEnv(env_fns, shared_memory=True)
|
||||
>>> envs.reset()
|
||||
>>> %timeit envs.step(envs.action_space.sample())
|
||||
1.36 ms ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
|
||||
@@ -262,9 +264,9 @@ This is convenient, for example, if you instantiate a policy. In the following e
|
||||
Because sometimes things may not go as planned, the exceptions raised in any given environment copy are re-raised in the vectorized environment, even when the copy run in parallel with `AsyncVectorEnv`. This way, you can choose how to handle these exceptions yourself (with ``try ... except``).
|
||||
|
||||
```python
|
||||
>>> class ErrorEnv(gymnasium.Env):
|
||||
... observation_space = gymnasium.spaces.Box(-1., 1., (2,), np.float32)
|
||||
... action_space = gymnasium.spaces.Discrete(2)
|
||||
>>> class ErrorEnv(gym.Env):
|
||||
... observation_space = gym.spaces.Box(-1., 1., (2,), np.float32)
|
||||
... action_space = gym.spaces.Discrete(2)
|
||||
...
|
||||
... def reset(self):
|
||||
... return np.zeros((2,), dtype=np.float32), {}
|
||||
@@ -273,11 +275,11 @@ Because sometimes things may not go as planned, the exceptions raised in any giv
|
||||
... if action == 1:
|
||||
... raise ValueError("An error occurred.")
|
||||
... observation = self.observation_space.sample()
|
||||
... return (observation, 0., False, {})
|
||||
... return observation, 0., False, False, {}
|
||||
|
||||
>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: ErrorEnv()] * 3)
|
||||
>>> observations, infos = envs.reset()
|
||||
>>> observations, rewards, dones, infos = envs.step(np.array([0, 0, 1]))
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([0, 0, 1]))
|
||||
ERROR: Received the following error from Worker-2: ValueError: An error occurred.
|
||||
ERROR: Shutting down Worker-2.
|
||||
ERROR: Raising the last exception back to the main process.
|
||||
@@ -292,7 +294,7 @@ Vectorized environments will batch actions and observations if they are elements
|
||||
In the following example, we create a new environment `SMILESEnv`, whose observations are strings representing the [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) notation of a molecular structure, with a custom observation space `SMILES`. The observations returned by the vectorized environment are contained in a tuple of strings.
|
||||
|
||||
```python
|
||||
>>> class SMILES(gymnasium.Space):
|
||||
>>> class SMILES(gym.Space):
|
||||
... def __init__(self, symbols):
|
||||
... super().__init__()
|
||||
... self.symbols = symbols
|
||||
@@ -300,9 +302,9 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
|
||||
... def __eq__(self, other):
|
||||
... return self.symbols == other.symbols
|
||||
|
||||
>>> class SMILESEnv(gymnasium.Env):
|
||||
>>> class SMILESEnv(gym.Env):
|
||||
... observation_space = SMILES("][()CO=")
|
||||
... action_space = gymnasium.spaces.Discrete(7)
|
||||
... action_space = gym.spaces.Discrete(7)
|
||||
...
|
||||
... def reset(self):
|
||||
... self._state = "["
|
||||
@@ -310,15 +312,15 @@ In the following example, we create a new environment `SMILESEnv`, whose observa
|
||||
...
|
||||
... def step(self, action):
|
||||
... self._state += self.observation_space.symbols[action]
|
||||
... reward = done = (action == 0)
|
||||
... return (self._state, float(reward), done, {})
|
||||
... reward = terminated = (action == 0)
|
||||
... return self._state, float(reward), terminated, False, {}
|
||||
|
||||
>>> envs = gymnasium.vector.AsyncVectorEnv(
|
||||
>>> envs = gym.vector.AsyncVectorEnv(
|
||||
... [lambda: SMILESEnv()] * 3,
|
||||
... shared_memory=False
|
||||
... )
|
||||
>>> envs.reset()
|
||||
>>> observations, rewards, dones, infos = envs.step(np.array([2, 5, 4]))
|
||||
>>> observations, rewards, terminated, truncated, infos = envs.step(np.array([2, 5, 4]))
|
||||
>>> observations
|
||||
('[(', '[O', '[C')
|
||||
```
|
||||
|
@@ -13,7 +13,7 @@ firstpage:
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|-------------------|--------------------------------|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (250, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -61,7 +61,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------|----------------------|----------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Adventure | `[0, 1, 2]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -12,7 +12,7 @@ title: Air Raid
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (250, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -31,7 +31,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | RIGHT |
|
||||
@@ -69,7 +69,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| AirRaid | `[1, ..., 8]` | `[0]` | `1` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -12,7 +12,7 @@ title: Alien
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| Alien | `[0, ..., 3]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
|
||||
|
@@ -12,7 +12,7 @@ title: Amidar
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -33,7 +33,7 @@ flavor looks like this:
|
||||
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -79,7 +79,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Amidar | `[0]` | `[0, 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Assault
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -30,7 +30,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -66,7 +66,7 @@ env = gymnasium.make("ALE/Assault-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Assault | `[0]` | `[0]` | `0` |
|
||||
|
||||
|
||||
|
@@ -11,7 +11,7 @@ title: Asterix
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -34,7 +34,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -75,7 +75,7 @@ env = gymnasium.make("ALE/Asterix-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Asterix | `[0]` | `[0]` | `0` |
|
||||
|
||||
|
||||
|
@@ -11,7 +11,7 @@ title: Asteroids
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -84,7 +84,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------------|--------------------|--------------|
|
||||
| Asteroids | `[0, ..., 31, 128]` | `[0, 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -12,7 +12,7 @@ title: Atlantis
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -75,7 +75,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| Atlantis | `[0, ..., 3]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Bank Heist
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------------------------|--------------------|--------------|
|
||||
| BankHeist | `[0, 4, 8, 12, 16, 20, 24, 28]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -12,7 +12,7 @@ title: Battle Zone
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|---------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| BattleZone | `[1, 2, 3]` | `[0]` | `1` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -18,7 +18,7 @@ grid:
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -85,7 +85,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| BeamRider | `[0]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Berzerk
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -63,7 +63,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------------------|--------------------|--------------|
|
||||
| Berzerk | `[1, ..., 9, 16, 17, 18]` | `[0]` | `1` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Bowling
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -77,7 +77,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Bowling | `[0, 2, 4]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Boxing
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -62,7 +62,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Boxing | `[0]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Breakout
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -32,7 +32,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|--------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | RIGHT |
|
||||
@@ -72,7 +72,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------------------------------------------|--------------------|--------------|
|
||||
| Breakout | `[0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Carnival
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (214, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Carnival | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Centipede
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -67,7 +67,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Centipede | `[22, 86]` | `[0]` | `22` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Chopper Command
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -64,7 +64,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|----------------|-------------|--------------------|--------------|
|
||||
| ChopperCommand | `[0, 2]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Crazy Climber
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (250, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -36,7 +36,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -79,7 +79,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|--------------|---------------|--------------------|--------------|
|
||||
| CrazyClimber | `[0, ..., 3]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -12,7 +12,7 @@ title: Defender
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|-------------------|-------------------------------|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -67,7 +67,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------|--------------------|---------------|
|
||||
|-------------|-------------------|--------------------|--------------|
|
||||
| Defender | `[1, ..., 9, 16]` | `[0, 1]` | `1` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Demon Attack
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|----------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -35,7 +35,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -80,7 +80,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|----------------|--------------------|--------------|
|
||||
| DemonAttack | `[1, 3, 5, 7]` | `[0, 1]` | `1` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Double Dunk
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|---------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (250, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -33,7 +33,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -78,7 +78,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|----------------|--------------------|--------------|
|
||||
| DoubleDunk | `[0, ..., 15]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Elevator Action
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (250, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -36,7 +36,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -81,7 +81,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|----------------|-------------|--------------------|--------------|
|
||||
| ElevatorAction | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Enduro
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (250, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -33,7 +33,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Enduro | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: FishingDerby
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
|
||||
The action space a subset of the following discrete set of legal actions:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -84,10 +84,10 @@ env = gymnasium.make("ALE/FishingDerby-v5")
|
||||
```
|
||||
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
| FishingDerby | `[0]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|--------------|-------------|--------------------|--------------|
|
||||
| FishingDerby | `[0]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
@@ -100,7 +100,7 @@ A thorough discussion of the intricate differences between the versions and conf
|
||||
general article on Atari environments.
|
||||
|
||||
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|
||||
| ----- | --------- | ------------------------- | ---------|
|
||||
|---------|--------------|------------------------------|----------------------|
|
||||
| v0 | `(2, 5,)` | `0.25` | `False` |
|
||||
| v4 | `(2, 5,)` | `0.0` | `False` |
|
||||
| v5 | `5` | `0.25` | `True` |
|
||||
|
@@ -11,7 +11,7 @@ title: Freeway
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
|
||||
The action space a subset of the following discrete set of legal actions:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Freeway-v5")
|
||||
```
|
||||
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| Freeway | `[0, ..., 7]` | `[0, 1]` | `0` |
|
||||
|
||||
|
||||
|
@@ -11,7 +11,7 @@ title: Frostbite
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
|
||||
The action space a subset of the following discrete set of legal actions:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Frostbite-v5")
|
||||
```
|
||||
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Frostbite | `[0, 2]` | `[0]` | `0` |
|
||||
|
||||
|
||||
@@ -100,7 +101,7 @@ A thorough discussion of the intricate differences between the versions and conf
|
||||
general article on Atari environments.
|
||||
|
||||
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|
||||
| ----- | --------- | ------------------------- | ---------|
|
||||
|---------|--------------|------------------------------|----------------------|
|
||||
| v0 | `(2, 5,)` | `0.25` | `False` |
|
||||
| v4 | `(2, 5,)` | `0.0` | `False` |
|
||||
| v5 | `5` | `0.25` | `True` |
|
||||
|
@@ -11,7 +11,7 @@ title: Gopher
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
|
||||
The action space a subset of the following discrete set of legal actions:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Gopher-v5")
|
||||
```
|
||||
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Gopher | `[0, 2]` | `[0, 1]` | `0` |
|
||||
|
||||
|
||||
@@ -100,7 +101,7 @@ A thorough discussion of the intricate differences between the versions and conf
|
||||
general article on Atari environments.
|
||||
|
||||
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|
||||
| ----- | --------- | ------------------------- | ---------|
|
||||
|---------|--------------|------------------------------|----------------------|
|
||||
| v0 | `(2, 5,)` | `0.25` | `False` |
|
||||
| v4 | `(2, 5,)` | `0.0` | `False` |
|
||||
| v5 | `5` | `0.25` | `True` |
|
||||
|
@@ -11,7 +11,7 @@ title: Gravitar
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -31,7 +31,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
|
||||
The action space a subset of the following discrete set of legal actions:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -84,8 +84,9 @@ env = gymnasium.make("ALE/Gravitar-v5")
|
||||
```
|
||||
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| Gravitar | `[0, ..., 4]` | `[0]` | `0` |
|
||||
|
||||
|
||||
@@ -100,7 +101,7 @@ A thorough discussion of the intricate differences between the versions and conf
|
||||
general article on Atari environments.
|
||||
|
||||
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|
||||
| ----- | --------- | ------------------------- | ---------|
|
||||
|---------|--------------|------------------------------|----------------------|
|
||||
| v0 | `(2, 5,)` | `0.25` | `False` |
|
||||
| v4 | `(2, 5,)` | `0.0` | `False` |
|
||||
| v5 | `5` | `0.25` | `True` |
|
||||
|
@@ -12,7 +12,7 @@ title: Hero
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|-------------------|---------------------------|
|
||||
|-------------------|---------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -68,7 +68,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|--------------------------|--------------------|---------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| Hero | `[0, ..., 4]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: IceHockey
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -65,7 +65,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| IceHockey | `[0, 2]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -84,7 +84,7 @@ Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]]
|
||||
The action space a subset of the following discrete set of legal actions:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -169,7 +169,7 @@ All Atari games are available in three versions. They differ in the default sett
|
||||
The differences are listed in the following table:
|
||||
|
||||
| Version | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|
||||
| ----- | --------- | ------------------------- | ---------|
|
||||
|---------|--------------|------------------------------|----------------------|
|
||||
| v0 | `(2, 5,)` | `0.25` | `False` |
|
||||
| v4 | `(2, 5,)` | `0.0` | `False` |
|
||||
| v5 | `5` | `0.25` | `True` |
|
||||
@@ -181,7 +181,7 @@ For each Atari game, several different configurations are registered in Gymnasiu
|
||||
v0 and v4. Let us take a look at all variations of Amidar-v0 that are registered with gymnasium:
|
||||
|
||||
| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|
||||
| ---------------------------- | -------- | --------- | ------------------------- | ----------------- |
|
||||
|----------------------------|-------------|--------------|------------------------------|----------------------|
|
||||
| Amidar-v0 | `"rgb"` | `(2, 5,)` | `0.25` | `False` |
|
||||
| AmidarDeterministic-v0 | `"rgb"` | `4` | `0.0` | `False` |
|
||||
| AmidarNoframeskip-v0 | `"rgb"` | `1` | `0.25` | `False` |
|
||||
@@ -194,7 +194,7 @@ environment configuration via arguments passed to `gymnasium.make`. Moreover, th
|
||||
are in the "ALE" namespace. The suffix "-ram" is still available. Thus, we get the following table:
|
||||
|
||||
| Name | `obs_type=` | `frameskip=` | `repeat_action_probability=` | `full_action_space=` |
|
||||
| ---------------------------- | -------- | --------- | ------------------------- | ----------------- |
|
||||
|-------------------|-------------|--------------|------------------------------|----------------------|
|
||||
| ALE/Amidar-v5 | `"rgb"` | `5` | `0.25` | `True` |
|
||||
| ALE/Amidar-ram-v5 | `"ram"` | `5` | `0.25` | `True` |
|
||||
|
||||
|
@@ -11,7 +11,7 @@ title: Jamesbond
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Jamesbond | `[0, 1]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: JourneyEscape
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -31,7 +31,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 2 | UP |
|
||||
| 3 | RIGHT |
|
||||
@@ -49,9 +49,6 @@ flavor looks like this:
|
||||
| 16 | DOWNRIGHTFIRE |
|
||||
| 17 | DOWNLEFTFIRE |
|
||||
|
||||
|
||||
|
||||
|
||||
### Observations
|
||||
By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
|
||||
possible to observe
|
||||
|
@@ -11,7 +11,7 @@ title: Kangaroo
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -64,7 +64,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Kangaroo | `[0, 1]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Krull
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -64,7 +64,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Krull | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Kung Fu Master
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ You are a Kung-Fu Master fighting your way through the Evil Wizard's temple. You
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|---------------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -68,7 +68,7 @@ env = gymnasium.make("ALE/KungFuMaster-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|--------------|-------------|--------------------|--------------|
|
||||
| KungFuMaster | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Montezuma Revenge
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|---------------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -53,7 +53,7 @@ env = gymnasium.make("ALE/MontezumaRevenge-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|------------------|-------------|--------------------|--------------|
|
||||
| MontezumaRevenge | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Ms Pacman
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ Your goal is to collect all of the pellets on the screen while avoiding the ghos
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -63,7 +63,7 @@ env = gymnasium.make("ALE/MsPacman-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| MsPacman | `[0, ..., 3]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Name This Game
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ Your goal is to defend the treasure that you have discovered. You must fight off
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | RIGHT |
|
||||
@@ -61,7 +61,7 @@ env = gymnasium.make("ALE/NameThisGame-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|--------------|---------------|--------------------|--------------|
|
||||
| NameThisGame | `[8, 24, 40]` | `[0, 1]` | `8` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Phoenix
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ Your goal is to reach and shoot the alien pilot. On your way there, you must eli
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | RIGHT |
|
||||
@@ -62,7 +62,7 @@ env = gymnasium.make("ALE/Phoenix-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Phoenix | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Pitfall
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -59,7 +59,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Pitfall | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Pong
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|---------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | RIGHT |
|
||||
@@ -71,7 +71,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Pong | `[0, 1]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Pooyan
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -30,7 +30,7 @@ number of actions (those that are meaningful in this game) are available. The re
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -38,8 +38,6 @@ flavor looks like this:
|
||||
| 4 | UPFIRE |
|
||||
| 5 | DOWNFIRE |
|
||||
|
||||
|
||||
|
||||
### Observations
|
||||
By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
|
||||
- The 128 Bytes of RAM of the console
|
||||
@@ -72,7 +70,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|--------------------|--------------------|--------------|
|
||||
| Pooyan | `[10, 30, 50, 70]` | `[0]` | `10` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: PrivateEye
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|---------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -59,7 +59,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| PrivateEye | `[0, ..., 4]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Qbert
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|--------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -70,7 +70,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Qbert | `[0]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Riverraid
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -74,7 +74,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-----------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Riverraid | `[0]` | `[0,1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Road Runner
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|---------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -57,7 +57,7 @@ via `gymnasium.make`.
|
||||
Score points are your only reward. You get score points each time you:
|
||||
|
||||
| actions | points |
|
||||
|-----------------------------------------------------------|-----|
|
||||
|-------------------------------------------------------|--------|
|
||||
| eat a pile of birdseed | 100 |
|
||||
| eat steel shot | 100 |
|
||||
| get the coyote hit by a mine (cannonball, rock, etc.) | 200 |
|
||||
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------------|------|----------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| RoadRunner | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Robot Tank
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -75,7 +75,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Robotank | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Seaquest
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -85,7 +85,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|------------------------------|---------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Seaquest | `[0]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Skiing
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -37,7 +37,7 @@ on the flavor of the environment (the combination of `mode` and `difficulty`). T
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------|
|
||||
|-----|--------|
|
||||
| 0 | NOOP |
|
||||
| 1 | RIGHT |
|
||||
| 2 | LEFT |
|
||||
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|------------------------------|---------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Skiing | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Solaris
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -57,7 +57,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Solaris | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: SpaceInvaders
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | RIGHT |
|
||||
@@ -70,7 +70,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|---------------|----------------|--------------------|--------------|
|
||||
| SpaceInvaders | `[0, ..., 15]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: StarGunner
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|---------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|--------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -71,7 +71,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|---------------|--------------------|--------------|
|
||||
| StarGunner | `[0, ..., 3]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Tennis
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -60,7 +60,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Tennis | `[0, 2]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
|
@@ -11,7 +11,7 @@ title: TimePilot
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -29,7 +29,7 @@ number of actions (those that are meaningful in this game) are available. The re
|
||||
flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -76,7 +76,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| TimePilot | `[0]` | `[0, 1, 2]` | `0` |
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
|
||||
are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
|
||||
|
@@ -11,7 +11,7 @@ title: Tutankham
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|--------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ Your goal is to rack up points by finding treasures in the mazes of the tomb whi
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | UP |
|
||||
| 2 | RIGHT |
|
||||
@@ -62,7 +62,7 @@ env = gymnasium.make("ALE/Tutankham-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Tutankham | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Up n' Down
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ Your goal is to steer your baja bugger to collect prizes and eliminate opponents
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -61,7 +61,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| UpNDown | `[0]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Venture
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -53,7 +53,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Venture | `[0]` | `[0, ..., 3]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Video Pinball
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ Your goal is to keep the ball in play as long as possible and to score as many p
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -65,7 +65,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|--------------|---------------|--------------------|--------------|
|
||||
| VideoPinball | `[0, ..., 2]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -11,7 +11,7 @@ title: Wizard of Wor
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|----------------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -25,7 +25,7 @@ Your goal is to beat the Wizard using your laser and radar scanner. Detailed doc
|
||||
By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
|
||||
|
||||
| Num | Action |
|
||||
|-----|------------------------|
|
||||
|-----|-----------|
|
||||
| 0 | NOOP |
|
||||
| 1 | FIRE |
|
||||
| 2 | UP |
|
||||
@@ -66,7 +66,7 @@ It is possible to specify various flavors of the environment via the keyword arg
|
||||
A flavor is a combination of a game mode and a difficulty setting.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|--------------|-------------|--------------------|--------------|
|
||||
| VideoPinball | `[0]` | `[0, 1]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -12,7 +12,7 @@ lastpage:
|
||||
This environment is part of the <a href='..'>Atari environments</a>. Please read that page first for general information.
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
|-------------------|-----------------------------------|
|
||||
| Action Space | Discrete(18) |
|
||||
| Observation Space | (210, 160, 3) |
|
||||
| Observation High | 255 |
|
||||
@@ -52,7 +52,7 @@ env = gymnasium.make("ALE/Zaxxon-v5")
|
||||
The various ways to configure the environment are described in detail in the article on Atari environments.
|
||||
|
||||
| Environment | Valid Modes | Valid Difficulties | Default Mode |
|
||||
|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
|
||||
|-------------|-------------|--------------------|--------------|
|
||||
| Zaxxon | `[0]` | `[0]` | `0` |
|
||||
|
||||
You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
|
||||
|
@@ -16,8 +16,8 @@ lastpage:
|
||||
|
||||
```{code-block} python
|
||||
|
||||
import gymnasium
|
||||
env = gymnasium.make("LunarLander-v2", render_mode="human")
|
||||
import gymnasium as gym
|
||||
env = gym.make("LunarLander-v2", render_mode="human")
|
||||
observation, info = env.reset(seed=42)
|
||||
for _ in range(1000):
|
||||
action = policy(observation) # User-defined policy function
|
||||
|
Reference in New Issue
Block a user