Rename gymnasium as gym in docs (#24)

This commit is contained in:
Mark Towers
2022-09-19 21:14:09 +01:00
committed by GitHub
parent 5674a52d4c
commit c9056e37e1
68 changed files with 1134 additions and 1131 deletions

View File

@@ -54,7 +54,8 @@ title: Vector
```
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> import gymnasium as gym
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
(array([[-0.02240574, -0.03439831, -0.03904812, 0.02810693],
[ 0.01586068, 0.01929009, 0.02394426, 0.04016077],
@@ -68,10 +69,10 @@ title: Vector
```
```python
>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
>>> envs = gym.vector.make("CartPole-v1", num_envs=3)
>>> envs.reset()
>>> actions = np.array([1, 0, 1])
>>> observations, rewards, dones, infos = envs.step(actions)
>>> observations, rewards, terminated, truncated, infos = envs.step(actions)
>>> observations
array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
@@ -80,7 +81,7 @@ array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
dtype=float32)
>>> rewards
array([1., 1., 1.])
>>> dones
>>> terminated
array([False, False, False])
>>> infos
{}

View File

@@ -12,9 +12,9 @@ also be chained to combine their effects. Most environments that are generated v
In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along
with (possibly optional) parameters to the wrapper's constructor:
```python
>>> import gymnasium
>>> import gymnasium as gym
>>> from gymnasium.wrappers import RescaleAction
>>> base_env = gymnasium.make("BipedalWalker-v3")
>>> base_env = gym.make("BipedalWalker-v3")
>>> base_env.action_space
Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32)
>>> wrapped_env = RescaleAction(base_env, min_action=0, max_action=1)
@@ -64,7 +64,7 @@ Let's say you have an environment with action space of type `Box`, but you would
only like to use a finite subset of actions. Then, you might want to implement the following wrapper
```python
class DiscreteActions(gymnasium.ActionWrapper):
class DiscreteActions(gym.ActionWrapper):
def __init__(self, env, disc_to_cont):
super().__init__(env)
self.disc_to_cont = disc_to_cont
@@ -74,7 +74,7 @@ class DiscreteActions(gymnasium.ActionWrapper):
return self.disc_to_cont[act]
if __name__ == "__main__":
env = gymnasium.make("LunarLanderContinuous-v2")
env = gym.make("LunarLanderContinuous-v2")
wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]),
np.array([0,1]), np.array([0,-1])])
print(wrapped_env.action_space) #Discrete(4)
@@ -95,7 +95,7 @@ the position of the target relative to the agent, i.e. `observation["target_posi
For this, you could implement an observation wrapper like this:
```python
class RelativePosition(gymnasium.ObservationWrapper):
class RelativePosition(gym.ObservationWrapper):
def __init__(self, env):
super().__init__(env)
self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)
@@ -117,7 +117,7 @@ Let us look at an example: Sometimes (especially when we do not have control ove
to a range to gain some numerical stability. To do that, we could, for instance, implement the following wrapper:
```python
class ClipReward(gymnasium.RewardWrapper):
class ClipReward(gym.RewardWrapper):
def __init__(self, env, min_reward, max_reward):
super().__init__(env)
self.min_reward = min_reward
@@ -137,7 +137,7 @@ When calling step causes `self.env.step()` to return `done=True`,
and the return format of `self.step()` is as follows:
```python
new_obs, terminal_reward, terminal_done, info
new_obs, terminal_reward, terminated, truncated, info
```
`new_obs` is the first observation after calling `self.env.reset()`,
@@ -145,7 +145,7 @@ new_obs, terminal_reward, terminal_done, info
`terminal_reward` is the reward after calling `self.env.step()`,
prior to calling `self.env.reset()`
`terminal_done` is always `True`
`terminated or truncated` is always `True`
`info` is a dict containing all the keys from the info dict returned by
the call to `self.env.reset()`, with additional keys `terminal_observation`
@@ -156,7 +156,7 @@ to `self.env.step()`.
If `done` is not true when `self.env.step()` is called, `self.step()` returns
```python
obs, reward, done, info
obs, reward, terminated, truncated, info
```
as normal.
@@ -164,12 +164,12 @@ as normal.
The AutoResetWrapper is not applied by default when calling `gymnasium.make()`, but can be applied by setting the optional `autoreset` argument to `True`:
```python
env = gymnasium.make("CartPole-v1", autoreset=True)
env = gym.make("CartPole-v1", autoreset=True)
```
The AutoResetWrapper can also be applied using its constructor:
```python
env = gymnasium.make("CartPole-v1")
env = gym.make("CartPole-v1")
env = AutoResetWrapper(env)
```
@@ -204,7 +204,7 @@ initialization of the environment. However, *Reacher* does not allow you to do t
of the reward are returned in `info`, so let us build a wrapper for Reacher that allows us to weight those terms:
```python
class ReacherRewardWrapper(gymnasium.Wrapper):
class ReacherRewardWrapper(gym.Wrapper):
def __init__(self, env, reward_dist_weight, reward_ctrl_weight):
super().__init__(env)
self.reward_dist_weight = reward_dist_weight
@@ -225,25 +225,25 @@ It is *not* sufficient to use a `RewardWrapper` in this case!
## Available Wrappers
| Name | Type | Arguments | Description |
|---------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `AtariPreprocessing` | `gymnasium.Wrapper` | `env: gymnasium.Env`, `noop_max: int = 30`, `frame_skip: int = 4`, `screen_size: int = 84`, `terminal_on_life_loss: bool = False`, `grayscale_obs: bool = True`, `grayscale_newaxis: bool = False`, `scale_obs: bool = False` | Implements the best practices from Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents" but will be deprecated soon. |
| `AutoResetWrapper` | `gymnasium.Wrapper` | `env` | The wrapped environment will automatically reset when the done state is reached. Make sure you read the documentation before using this wrapper!|
| `ClipAction` | `gymnasium.ActionWrapper` | `env` | Clip the continuous action to the valid bound specified by the environment's `action_space` |
| `FilterObservation` | `gymnasium.ObservationWrapper` | `env`, `filter_keys=None` | If you have an environment that returns dictionaries as observations, but you would like to only keep a subset of the entries, you can use this wrapper. `filter_keys` should be an iterable that contains the keys that are kept in the new observation. If it is `None`, all keys will be kept and the wrapper has no effect. |
| `FlattenObservation` | `gymnasium.ObservationWrapper` | `env` | Observation wrapper that flattens the observation |
| `FrameStack` | `gymnasium.ObservationWrapper` | `env`, `num_stack`, `lz4_compress=False` | Observation wrapper that stacks the observations in a rolling manner. For example, if the number of stacks is 4, then the returned observation contains the most recent 4 observations. Observations will be objects of type `LazyFrames`. This object can be cast to a numpy array via `np.asarray(obs)`. You can also access single frames or slices via the usual `__getitem__` syntax. If `lz4_compress` is set to true, the `LazyFrames` object will compress the frames internally (losslessly). The first observation (i.e. the one returned by `reset`) will consist of `num_stack` repitions of the first frame. |
| `GrayScaleObservation` | `gymnasium.ObservationWrapper` | `env`, `keep_dim=False` | Convert the image observation from RGB to gray scale. By default, the resulting observation will be 2-dimensional. If `keep_dim` is set to true, a singleton dimension will be added (i.e. the observations are of shape AxBx1). |
| `NormalizeReward` | `gymnasium.Wrapper` | `env`, `gamma=0.99`, `epsilon=1e-8` | This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance. `epsilon` is a stability parameter and `gamma` is the discount factor that is used in the exponential moving average. The exponential moving average will have variance `(1 - gamma)**2`. The scaling depends on past trajectories and rewards will not be scaled correctly if the wrapper was newly instantiated or the policy was changed recently. |
| `NormalizeObservation` | `gymnasium.Wrapper` | `env`, `epsilon=1e-8` | This wrapper will normalize observations s.t. each coordinate is centered with unit variance. The normalization depends on past trajectories and observations will not be normalized correctly if the wrapper was newly instantiated or the policy was changed recently. `epsilon` is a stability parameter that is used when scaling the observations. |
| `OrderEnforcing` | `gymnasium.Wrapper` | `env` | This will produce an error if `step` is called before an initial `reset` |
| `PixelObservationWrapper` | `gymnasium.ObservationWrapper` | `env`, `pixels_only=True`, `render_kwargs=None`, `pixel_keys=("pixels",)` | Augment observations by pixel values obtained via `render`. You can specify whether the original observations should be discarded entirely or be augmented by setting `pixels_only`. Also, you can provide keyword arguments for `render`. |
| `RecordEpisodeStatistics` | `gymnasium.Wrapper` | `env`, `deque_size=100` | This will keep track of cumulative rewards and episode lengths. At the end of an episode, the statistics of the episode will be added to `info`. Moreover, the rewards and episode lengths are stored in buffers that can be accessed via `wrapped_env.return_queue` and `wrapped_env.length_queue` respectively. The size of these buffers can be set via `deque_size`. |
| `RecordVideo` | `gymnasium.Wrapper` | `env`, `video_folder: str`, `episode_trigger: Callable[[int], bool] = None`, `step_trigger: Callable[[int], bool] = None`, `video_length: int = 0`, `name_prefix: str = "rl-video"` | This wrapper will record videos of rollouts. The results will be saved in the folder specified via `video_folder`. You can specify a prefix for the filenames via `name_prefix`. Usually, you only want to record the environment intermittently, say every hundreth episode. To allow this, you can pass `episode_trigger` or `step_trigger`. At most one of these should be passed. These functions will accept an episode index or step index, respectively. They should return a boolean that indicates whether a recording should be started at this point. If neither `episode_trigger`, nor `step_trigger` is passed, a default `episode_trigger` will be used. By default, the recording will be stopped once a done signal has been emitted by the environment. However, you can also create recordings of fixed length (possibly spanning several episodes) by passing a strictly positive value for `video_length`. |
| `RescaleAction` | `gymnasium.ActionWrapper` | `env`, `min_action`, `max_action` | Rescales the continuous action space of the environment to a range \[`min_action`, `max_action`], where `min_action` and `max_action` are numpy arrays or floats. |
| `ResizeObservation` | `gymnasium.ObservationWrapper` | `env`, `shape` | This wrapper works on environments with image observations (or more generally observations of shape AxBxC) and resizes the observation to the shape given by the tuple `shape`. The argument `shape` may also be an integer. In that case, the observation is scaled to a square of sidelength `shape` |
| `TimeAwareObservation` | `gymnasium.ObservationWrapper` | `env` | Augment the observation with current time step in the trajectory (by appending it to the observation). This can be useful to ensure that things stay Markov. Currently it only works with one-dimensional observation spaces. |
| `TimeLimit` | `gymnasium.Wrapper` | `env`, `max_episode_steps=None` | Probably the most useful wrapper in Gymnasium. This wrapper will emit a done signal if the specified number of steps is exceeded in an episode. In order to be able to distinguish termination and truncation, you need to check `info`. If it does not contain the key `"TimeLimit.truncated"`, the environment did not reach the timelimit. Otherwise, `info["TimeLimit.truncated"]` will be true if the episode was terminated because of the time limit. |
| `TransformObservation` | `gymnasium.ObservationWrapper` | `env`, `f` | This wrapper will apply `f` to observations |
| `TransformReward` | `gymnasium.RewardWrapper` | `env`, `f` | This wrapper will apply `f` to rewards |
| `VectorListInfo` | `gymnasium.Wrapper` | `env` | This wrapper will convert the info of a vectorized environment from the `dict` format to a `list` of dictionaries where the _i-th_ dictionary contains info of the _i-th_ environment. If using other wrappers that perform operation on info like `RecordEpisodeStatistics`, this need to be the outermost wrapper. |
| Name | Type | Arguments | Description |
|---------------------------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `AtariPreprocessing` | `gymnasium.Wrapper` | `env: gymnasium.Env`, `noop_max: int = 30`, `frame_skip: int = 4`, `screen_size: int = 84`, `terminal_on_life_loss: bool = False`, `grayscale_obs: bool = True`, `grayscale_newaxis: bool = False`, `scale_obs: bool = False` | Implements the best practices from Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents" but will be deprecated soon. |
| `AutoResetWrapper` | `gymnasium.Wrapper` | `env` | The wrapped environment will automatically reset when the done state is reached. Make sure you read the documentation before using this wrapper! |
| `ClipAction` | `gymnasium.ActionWrapper` | `env` | Clip the continuous action to the valid bound specified by the environment's `action_space` |
| `FilterObservation` | `gymnasium.ObservationWrapper` | `env`, `filter_keys=None` | If you have an environment that returns dictionaries as observations, but you would like to only keep a subset of the entries, you can use this wrapper. `filter_keys` should be an iterable that contains the keys that are kept in the new observation. If it is `None`, all keys will be kept and the wrapper has no effect. |
| `FlattenObservation` | `gymnasium.ObservationWrapper` | `env` | Observation wrapper that flattens the observation |
| `FrameStack` | `gymnasium.ObservationWrapper` | `env`, `num_stack`, `lz4_compress=False` | Observation wrapper that stacks the observations in a rolling manner. For example, if the number of stacks is 4, then the returned observation contains the most recent 4 observations. Observations will be objects of type `LazyFrames`. This object can be cast to a numpy array via `np.asarray(obs)`. You can also access single frames or slices via the usual `__getitem__` syntax. If `lz4_compress` is set to true, the `LazyFrames` object will compress the frames internally (losslessly). The first observation (i.e. the one returned by `reset`) will consist of `num_stack` repitions of the first frame. |
| `GrayScaleObservation` | `gymnasium.ObservationWrapper` | `env`, `keep_dim=False` | Convert the image observation from RGB to gray scale. By default, the resulting observation will be 2-dimensional. If `keep_dim` is set to true, a singleton dimension will be added (i.e. the observations are of shape AxBx1). |
| `NormalizeReward` | `gymnasium.Wrapper` | `env`, `gamma=0.99`, `epsilon=1e-8` | This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance. `epsilon` is a stability parameter and `gamma` is the discount factor that is used in the exponential moving average. The exponential moving average will have variance `(1 - gamma)**2`. The scaling depends on past trajectories and rewards will not be scaled correctly if the wrapper was newly instantiated or the policy was changed recently. |
| `NormalizeObservation` | `gymnasium.Wrapper` | `env`, `epsilon=1e-8` | This wrapper will normalize observations s.t. each coordinate is centered with unit variance. The normalization depends on past trajectories and observations will not be normalized correctly if the wrapper was newly instantiated or the policy was changed recently. `epsilon` is a stability parameter that is used when scaling the observations. |
| `OrderEnforcing` | `gymnasium.Wrapper` | `env` | This will produce an error if `step` is called before an initial `reset` |
| `PixelObservationWrapper` | `gymnasium.ObservationWrapper` | `env`, `pixels_only=True`, `render_kwargs=None`, `pixel_keys=("pixels",)` | Augment observations by pixel values obtained via `render`. You can specify whether the original observations should be discarded entirely or be augmented by setting `pixels_only`. Also, you can provide keyword arguments for `render`. |
| `RecordEpisodeStatistics` | `gymnasium.Wrapper` | `env`, `deque_size=100` | This will keep track of cumulative rewards and episode lengths. At the end of an episode, the statistics of the episode will be added to `info`. Moreover, the rewards and episode lengths are stored in buffers that can be accessed via `wrapped_env.return_queue` and `wrapped_env.length_queue` respectively. The size of these buffers can be set via `deque_size`. |
| `RecordVideo` | `gymnasium.Wrapper` | `env`, `video_folder: str`, `episode_trigger: Callable[[int], bool] = None`, `step_trigger: Callable[[int], bool] = None`, `video_length: int = 0`, `name_prefix: str = "rl-video"` | This wrapper will record videos of rollouts. The results will be saved in the folder specified via `video_folder`. You can specify a prefix for the filenames via `name_prefix`. Usually, you only want to record the environment intermittently, say every hundreth episode. To allow this, you can pass `episode_trigger` or `step_trigger`. At most one of these should be passed. These functions will accept an episode index or step index, respectively. They should return a boolean that indicates whether a recording should be started at this point. If neither `episode_trigger`, nor `step_trigger` is passed, a default `episode_trigger` will be used. By default, the recording will be stopped once a done signal has been emitted by the environment. However, you can also create recordings of fixed length (possibly spanning several episodes) by passing a strictly positive value for `video_length`. |
| `RescaleAction` | `gymnasium.ActionWrapper` | `env`, `min_action`, `max_action` | Rescales the continuous action space of the environment to a range \[`min_action`, `max_action`], where `min_action` and `max_action` are numpy arrays or floats. |
| `ResizeObservation` | `gymnasium.ObservationWrapper` | `env`, `shape` | This wrapper works on environments with image observations (or more generally observations of shape AxBxC) and resizes the observation to the shape given by the tuple `shape`. The argument `shape` may also be an integer. In that case, the observation is scaled to a square of sidelength `shape` |
| `TimeAwareObservation` | `gymnasium.ObservationWrapper` | `env` | Augment the observation with current time step in the trajectory (by appending it to the observation). This can be useful to ensure that things stay Markov. Currently it only works with one-dimensional observation spaces. |
| `TimeLimit` | `gymnasium.Wrapper` | `env`, `max_episode_steps=None` | Probably the most useful wrapper in Gymnasium. This wrapper will emit a done signal if the specified number of steps is exceeded in an episode. In order to be able to distinguish termination and truncation, you need to check `info`. If it does not contain the key `"TimeLimit.truncated"`, the environment did not reach the timelimit. Otherwise, `info["TimeLimit.truncated"]` will be true if the episode was terminated because of the time limit. |
| `TransformObservation` | `gymnasium.ObservationWrapper` | `env`, `f` | This wrapper will apply `f` to observations |
| `TransformReward` | `gymnasium.RewardWrapper` | `env`, `f` | This wrapper will apply `f` to rewards |
| `VectorListInfo` | `gymnasium.Wrapper` | `env` | This wrapper will convert the info of a vectorized environment from the `dict` format to a `list` of dictionaries where the _i-th_ dictionary contains info of the _i-th_ environment. If using other wrappers that perform operation on info like `RecordEpisodeStatistics`, this need to be the outermost wrapper. |