From 2a7ebc4271e50556368fda0b98480944e80c4d0c Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Wed, 12 Oct 2022 15:58:01 +0100 Subject: [PATCH] Updates the Env, Wrapper and Vector API documentation (#48) --- docs/api/core.md | 85 ----- docs/api/env.md | 79 +++++ docs/api/registry.md | 31 ++ docs/api/spaces.md | 62 +++- docs/api/spaces/composite.md | 12 +- docs/api/spaces/fundamental.md | 19 +- docs/api/spaces/utils.md | 3 - docs/api/spaces/vector_utils.md | 20 ++ docs/api/utils.md | 25 +- docs/api/vector.md | 81 ++--- docs/api/wrappers.md | 313 +++++++---------- docs/api/wrappers/action_wrappers.md | 22 ++ docs/api/wrappers/misc_wrappers.md | 68 ++++ docs/api/wrappers/observation_wrappers.md | 62 ++++ docs/api/wrappers/reward_wrappers.md | 22 ++ docs/content/basic_usage.md | 7 + docs/content/environment_creation.md | 10 + docs/content/handling_timelimits.md | 46 ++- docs/content/vectorising.md | 11 +- docs/environments/atari.md | 21 +- docs/environments/atari/adventure.md | 12 +- docs/environments/atari/air_raid.md | 12 +- docs/environments/atari/alien.md | 12 +- docs/environments/atari/amidar.md | 12 +- docs/environments/atari/assault.md | 12 +- docs/environments/atari/asterix.md | 12 +- docs/environments/atari/asteroids.md | 12 +- docs/environments/atari/atlantis.md | 12 +- docs/environments/atari/bank_heist.md | 12 +- docs/environments/atari/battle_zone.md | 12 +- docs/environments/atari/beam_rider.md | 12 +- docs/environments/atari/berzerk.md | 12 +- docs/environments/atari/bowling.md | 12 +- docs/environments/atari/boxing.md | 12 +- docs/environments/atari/breakout.md | 12 +- docs/environments/atari/carnival.md | 12 +- docs/environments/atari/centipede.md | 12 +- docs/environments/atari/chopper_command.md | 12 +- docs/environments/atari/crazy_climber.md | 12 +- docs/environments/atari/defender.md | 12 +- docs/environments/atari/demon_attack.md | 12 +- docs/environments/atari/double_dunk.md | 12 +- docs/environments/atari/elevator_action.md | 12 +- docs/environments/atari/enduro.md | 12 +- docs/environments/atari/fishing_derby.md | 10 +- docs/environments/atari/freeway.md | 10 +- docs/environments/atari/frostbite.md | 10 +- docs/environments/atari/gopher.md | 10 +- docs/environments/atari/gravitar.md | 10 +- docs/environments/atari/hero.md | 12 +- docs/environments/atari/ice_hockey.md | 12 +- docs/environments/atari/jamesbond.md | 12 +- docs/environments/atari/journey_escape.md | 12 +- docs/environments/atari/kangaroo.md | 12 +- docs/environments/atari/krull.md | 12 +- docs/environments/atari/kung_fu_master.md | 12 +- docs/environments/atari/montezuma_revenge.md | 12 +- docs/environments/atari/ms_pacman.md | 12 +- docs/environments/atari/name_this_game.md | 12 +- docs/environments/atari/phoenix.md | 12 +- docs/environments/atari/pitfall.md | 12 +- docs/environments/atari/pong.md | 12 +- docs/environments/atari/pooyan.md | 12 +- docs/environments/atari/private_eye.md | 12 +- docs/environments/atari/qbert.md | 12 +- docs/environments/atari/riverraid.md | 12 +- docs/environments/atari/road_runner.md | 12 +- docs/environments/atari/robotank.md | 12 +- docs/environments/atari/seaquest.md | 12 +- docs/environments/atari/skiing.md | 12 +- docs/environments/atari/solaris.md | 12 +- docs/environments/atari/space_invaders.md | 12 +- docs/environments/atari/star_gunner.md | 12 +- docs/environments/atari/tennis.md | 12 +- docs/environments/atari/time_pilot.md | 12 +- docs/environments/atari/tutankham.md | 12 +- docs/environments/atari/up_n_down.md | 12 +- docs/environments/atari/venture.md | 12 +- docs/environments/atari/video_pinball.md | 12 +- docs/environments/atari/wizard_of_wor.md | 12 +- docs/environments/atari/zaxxon.md | 12 +- docs/environments/box2d.md | 2 +- docs/environments/classic_control.md | 2 +- docs/environments/mujoco.md | 23 +- docs/environments/toy_text.md | 2 +- docs/index.md | 12 +- gymnasium/core.py | 321 ++++++++++++------ gymnasium/envs/box2d/bipedal_walker.py | 20 +- gymnasium/envs/box2d/car_racing.py | 22 +- gymnasium/envs/box2d/lunar_lander.py | 20 +- gymnasium/envs/classic_control/acrobot.py | 18 +- gymnasium/envs/classic_control/cartpole.py | 14 +- .../continuous_mountain_car.py | 18 +- .../envs/classic_control/mountain_car.py | 18 +- gymnasium/envs/classic_control/pendulum.py | 18 +- gymnasium/envs/mujoco/ant_v4.py | 16 +- gymnasium/envs/mujoco/half_cheetah_v4.py | 16 +- gymnasium/envs/mujoco/hopper_v4.py | 16 +- gymnasium/envs/mujoco/humanoid_v4.py | 16 +- gymnasium/envs/mujoco/humanoidstandup_v4.py | 16 +- .../mujoco/inverted_double_pendulum_v4.py | 16 +- gymnasium/envs/mujoco/inverted_pendulum_v4.py | 16 +- gymnasium/envs/mujoco/pusher_v4.py | 16 +- gymnasium/envs/mujoco/reacher_v4.py | 16 +- gymnasium/envs/mujoco/swimmer_v4.py | 18 +- gymnasium/envs/mujoco/walker2d_v4.py | 16 +- gymnasium/envs/toy_text/blackjack.py | 12 +- gymnasium/envs/toy_text/cliffwalking.py | 12 +- gymnasium/envs/toy_text/frozen_lake.py | 10 +- gymnasium/envs/toy_text/taxi.py | 14 +- gymnasium/spaces/graph.py | 4 +- gymnasium/utils/play.py | 8 +- gymnasium/vector/__init__.py | 2 +- gymnasium/vector/vector_env.py | 80 ++++- gymnasium/wrappers/__init__.py | 4 +- gymnasium/wrappers/atari_preprocessing.py | 1 + gymnasium/wrappers/autoreset.py | 1 + gymnasium/wrappers/frame_stack.py | 3 +- gymnasium/wrappers/pixel_observation.py | 10 +- gymnasium/wrappers/transform_observation.py | 2 +- gymnasium/wrappers/transform_reward.py | 2 +- gymnasium/wrappers/vector_list_info.py | 17 +- 122 files changed, 1412 insertions(+), 1179 deletions(-) delete mode 100644 docs/api/core.md create mode 100644 docs/api/env.md create mode 100644 docs/api/registry.md create mode 100644 docs/api/spaces/vector_utils.md create mode 100644 docs/api/wrappers/action_wrappers.md create mode 100644 docs/api/wrappers/misc_wrappers.md create mode 100644 docs/api/wrappers/observation_wrappers.md create mode 100644 docs/api/wrappers/reward_wrappers.md diff --git a/docs/api/core.md b/docs/api/core.md deleted file mode 100644 index c03bf3289..000000000 --- a/docs/api/core.md +++ /dev/null @@ -1,85 +0,0 @@ -# Core - -## gymnasium.Env - -```{eval-rst} -.. autofunction:: gymnasium.Env.step -``` - -```{eval-rst} -.. autofunction:: gymnasium.Env.reset -``` - -```{eval-rst} -.. autofunction:: gymnasium.Env.render -``` - -### Attributes - -```{eval-rst} -.. autoattribute:: gymnasium.Env.action_space - - This attribute gives the format of valid actions. It is of datatype `Space` provided by Gymnasium. For example, if the action space is of type `Discrete` and gives the value `Discrete(2)`, this means there are two valid discrete actions: 0 & 1. - - .. code:: - - >>> env.action_space - Discrete(2) - >>> env.observation_space - Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32) -``` - -```{eval-rst} -.. autoattribute:: gymnasium.Env.observation_space - - This attribute gives the format of valid observations. It is of datatype :class:`Space` provided by Gymnasium. For example, if the observation space is of type :class:`Box` and the shape of the object is ``(4,)``, this denotes a valid observation will be an array of 4 numbers. We can check the box bounds as well with attributes. - - .. code:: - - >>> env.observation_space.high - array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38], dtype=float32) - >>> env.observation_space.low - array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38], dtype=float32) -``` - -```{eval-rst} -.. autoattribute:: gymnasium.Env.reward_range - - This attribute is a tuple corresponding to min and max possible rewards. Default range is set to ``(-inf,+inf)``. You can set it if you want a narrower range. -``` - -### Additional Methods - -```{eval-rst} -.. autofunction:: gymnasium.Env.close -``` - -```{eval-rst} -.. autofunction:: gymnasium.Env.seed -``` - - -## gymnasium.Wrapper - -```{eval-rst} -.. autoclass:: gymnasium.Wrapper -``` - -## gymnasium.ObservationWrapper - -```{eval-rst} -.. autoclass:: gymnasium.ObservationWrapper -``` - - -## gymnasium.RewardWrapper - -```{eval-rst} -.. autoclass:: gymnasium.RewardWrapper -``` - -## gymnasium.ActionWrapper - -```{eval-rst} -.. autoclass:: gymnasium.ActionWrapper -``` \ No newline at end of file diff --git a/docs/api/env.md b/docs/api/env.md new file mode 100644 index 000000000..765e69d87 --- /dev/null +++ b/docs/api/env.md @@ -0,0 +1,79 @@ +--- +title: Utils +--- + +# Env + +## gymnasium.Env + +```{eval-rst} +.. autoclass:: gymnasium.Env +``` + +### Methods + +```{eval-rst} +.. autofunction:: gymnasium.Env.step +.. autofunction:: gymnasium.Env.reset +.. autofunction:: gymnasium.Env.render +``` + +### Attributes + +```{eval-rst} +.. autoattribute:: gymnasium.Env.action_space + + The Space object corresponding to valid actions, all valid actions should be contained with the space. For example, if the action space is of type `Discrete` and gives the value `Discrete(2)`, this means there are two valid discrete actions: 0 & 1. + + .. code:: + + >>> env.action_space + Discrete(2) + >>> env.observation_space + Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32) + +.. autoattribute:: gymnasium.Env.observation_space + + The Space object corresponding to valid observations, all valid observations should be contained with the space. For example, if the observation space is of type :class:`Box` and the shape of the object is ``(4,)``, this denotes a valid observation will be an array of 4 numbers. We can check the box bounds as well with attributes. + + .. code:: + + >>> env.observation_space.high + array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38], dtype=float32) + >>> env.observation_space.low + array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38], dtype=float32) + +.. autoattribute:: gymnasium.Env.metadata + + The metadata of the environment containing rendering modes, rendering fps, etc + +.. autoattribute:: gymnasium.Env.render_mode + + The render mode of the environment determined at initialisation + +.. autoattribute:: gymnasium.Env.reward_range + + A tuple corresponding to the minimum and maximum possible rewards for an agent over an episode. The default reward range is set to :math:`(-\infty,+\infty)`. + +.. autoattribute:: gymnasium.Env.spec + + The ``EnvSpec`` of the environment normally set during :py:meth:`gymnasium.make` +``` + +### Additional Methods + +```{eval-rst} +.. autofunction:: gymnasium.Env.close +.. autoproperty:: gymnasium.Env.unwrapped +.. autoproperty:: gymnasium.Env.np_random +``` + +### Implementing environments + +```{eval-rst} +.. py:currentmodule:: gymnasium + +When implementing an environment, the :meth:Env.reset and :meth:`Env.step` functions much be created describing the +dynamics of the environment. +For more information see the environment creation tutorial. +``` diff --git a/docs/api/registry.md b/docs/api/registry.md new file mode 100644 index 000000000..b0f39941b --- /dev/null +++ b/docs/api/registry.md @@ -0,0 +1,31 @@ +--- +title: Registry +--- + +# Registry + +Gymnasium allows users to automatically load environments, pre-wrapped with several important wrappers. +Environments can also be created through python imports. + +## Make + +```{eval-rst} +.. autofunction:: gymnasium.make +``` + +## Register + +```{eval-rst} +.. autofunction:: gymnasium.register +``` + +## All registered environments + +To find all the registered Gymnasium environments, use the `gymnasium.envs.registry.keys()`. +This will not include environments registered only in OpenAI Gym however can be loaded by `gymnasium.make`. + +## Spec + +```{eval-rst} +.. autofunction:: gymnasium.spec +``` diff --git a/docs/api/spaces.md b/docs/api/spaces.md index ee258b05c..4a0ecb391 100644 --- a/docs/api/spaces.md +++ b/docs/api/spaces.md @@ -1,3 +1,8 @@ +--- +title: Spaces +--- + + # Spaces ```{toctree} @@ -5,37 +10,38 @@ spaces/fundamental spaces/composite spaces/utils +spaces/vector_utils ``` - ```{eval-rst} .. autoclass:: gymnasium.spaces.Space ``` -## General Functions +## Attributes + +```{eval-rst} +.. autoproperty:: gymnasium.spaces.space.Space.shape +.. property:: Space.dtype + + Return the data type of this space. +``` + +## Methods Each space implements the following functions: ```{eval-rst} -.. autofunction:: gymnasium.spaces.Space.sample - -.. autofunction:: gymnasium.spaces.Space.contains - -.. autoproperty:: gymnasium.spaces.Space.shape - -.. property:: gymnasium.spaces.Space.dtype - - Return the data type of this space. - -.. autofunction:: gymnasium.spaces.Space.seed - -.. autofunction:: gymnasium.spaces.Space.to_jsonable - -.. autofunction:: gymnasium.spaces.Space.from_jsonable +.. autofunction:: gymnasium.spaces.space.Space.sample +.. autofunction:: gymnasium.spaces.space.Space.contains +.. autofunction:: gymnasium.spaces.space.Space.seed +.. autofunction:: gymnasium.spaces.space.Space.to_jsonable +.. autofunction:: gymnasium.spaces.space.Space.from_jsonable ``` ## Fundamental Spaces +Gymnasium has a number of fundamental spaces that are used as building boxes for more complex spaces. + ```{eval-rst} .. currentmodule:: gymnasium.spaces @@ -48,6 +54,8 @@ Each space implements the following functions: ## Composite Spaces +Often environment spaces require joining fundamental spaces together for vectorised environments, separate agents or readability of the space. + ```{eval-rst} * :py:class:`Dict` - Supports a dictionary of keys and subspaces, used for a fixed number of unordered spaces * :py:class:`Tuple` - Supports a tuple of subspaces, used for multiple for a fixed number of ordered spaces @@ -57,9 +65,29 @@ Each space implements the following functions: ## Utils +Gymnasium contains a number of helpful utility functions for flattening and unflattening spaces. +This can be important for passing information to neural networks. + ```{eval-rst} * :py:class:`utils.flatdim` - The number of dimensions the flattened space will contain * :py:class:`utils.flatten_space` - Flattens a space for which the `flattened` space instances will contain * :py:class:`utils.flatten` - Flattens an instance of a space that is contained within the flattened version of the space * :py:class:`utils.unflatten` - The reverse of the `flatten_space` function ``` + +## Vector Utils + +When vectorizing environments, it is necessary to modify the observation and action spaces for new batched spaces sizes. +Therefore, Gymnasium provides a number of additional functions used when using a space with a Vector environment. + +```{eval-rst} +.. currentmodule:: gymnasium + +* :py:class:`vector.utils.batch_space` +* :py:class:`vector.utils.concatenate` +* :py:class:`vector.utils.iterate` +* :py:class:`vector.utils.create_empty_array` +* :py:class:`vector.utils.create_shared_memory` +* :py:class:`vector.utils.read_from_shared_memory` +* :py:class:`vector.utils.write_to_shared_memory` +``` diff --git a/docs/api/spaces/composite.md b/docs/api/spaces/composite.md index b93d0b722..618d4e061 100644 --- a/docs/api/spaces/composite.md +++ b/docs/api/spaces/composite.md @@ -5,7 +5,8 @@ ```{eval-rst} .. autoclass:: gymnasium.spaces.Dict - .. automethod:: sample +.. automethod:: gymnasium.spaces.Dict.sample +.. automethod:: gymnasium.spaces.Dict.seed ``` ## Tuple @@ -13,7 +14,8 @@ ```{eval-rst} .. autoclass:: gymnasium.spaces.Tuple - .. automethod:: sample +.. automethod:: gymnasium.spaces.Tuple.sample +.. automethod:: gymnasium.spaces.Tuple.seed ``` ## Sequence @@ -21,7 +23,8 @@ ```{eval-rst} .. autoclass:: gymnasium.spaces.Sequence - .. automethod:: sample +.. automethod:: gymnasium.spaces.Sequence.sample +.. automethod:: gymnasium.spaces.Sequence.seed ``` ## Graph @@ -29,5 +32,6 @@ ```{eval-rst} .. autoclass:: gymnasium.spaces.Graph - .. automethod:: sample +.. automethod:: gymnasium.spaces.Graph.sample +.. automethod:: gymnasium.spaces.Graph.seed ``` diff --git a/docs/api/spaces/fundamental.md b/docs/api/spaces/fundamental.md index 4eab2310c..d83f43854 100644 --- a/docs/api/spaces/fundamental.md +++ b/docs/api/spaces/fundamental.md @@ -9,24 +9,25 @@ title: Fundamental Spaces ```{eval-rst} .. autoclass:: gymnasium.spaces.Box - .. automethod:: is_bounded - .. automethod:: sample +.. automethod:: gymnasium.spaces.Box.sample +.. automethod:: gymnasium.spaces.Box.seed +.. automethod:: gymnasium.spaces.Box.is_bounded ``` ## Discrete ```{eval-rst} .. autoclass:: gymnasium.spaces.Discrete - - .. automethod:: sample +.. automethod:: gymnasium.spaces.Discrete.sample +.. automethod:: gymnasium.spaces.Discrete.seed ``` ## MultiBinary ```{eval-rst} .. autoclass:: gymnasium.spaces.MultiBinary - - .. automethod:: sample +.. automethod:: gymnasium.spaces.MultiBinary.sample +.. automethod:: gymnasium.spaces.MultiBinary.seed ``` ## MultiDiscrete @@ -34,7 +35,8 @@ title: Fundamental Spaces ```{eval-rst} .. autoclass:: gymnasium.spaces.MultiDiscrete - .. automethod:: sample +.. automethod:: gymnasium.spaces.MultiDiscrete.sample +.. automethod:: gymnasium.spaces.MultiDiscrete.seed ``` ## Text @@ -42,5 +44,6 @@ title: Fundamental Spaces ```{eval-rst} .. autoclass:: gymnasium.spaces.Text - .. automethod:: sample +.. automethod:: gymnasium.spaces.Text.sample +.. automethod:: gymnasium.spaces.Text.seed ``` \ No newline at end of file diff --git a/docs/api/spaces/utils.md b/docs/api/spaces/utils.md index 8315a9a54..e26ecc636 100644 --- a/docs/api/spaces/utils.md +++ b/docs/api/spaces/utils.md @@ -6,10 +6,7 @@ title: Utils ```{eval-rst} .. autofunction:: gymnasium.spaces.utils.flatdim - .. autofunction:: gymnasium.spaces.utils.flatten_space - .. autofunction:: gymnasium.spaces.utils.flatten - .. autofunction:: gymnasium.spaces.utils.unflatten ``` \ No newline at end of file diff --git a/docs/api/spaces/vector_utils.md b/docs/api/spaces/vector_utils.md new file mode 100644 index 000000000..9f448447f --- /dev/null +++ b/docs/api/spaces/vector_utils.md @@ -0,0 +1,20 @@ +--- +title: Vector Utils +--- + +# Spaces Vector Utils + +```{eval-rst} +.. autofunction:: gymnasium.vector.utils.batch_space +.. autofunction:: gymnasium.vector.utils.concatenate +.. autofunction:: gymnasium.vector.utils.iterate +``` + +## Shared Memory Utils + +```{eval-rst} +.. autofunction:: gymnasium.vector.utils.create_empty_array +.. autofunction:: gymnasium.vector.utils.create_shared_memory +.. autofunction:: gymnasium.vector.utils.read_from_shared_memory +.. autofunction:: gymnasium.vector.utils.write_to_shared_memory +``` diff --git a/docs/api/utils.md b/docs/api/utils.md index 9fd55430e..7d1becfb6 100644 --- a/docs/api/utils.md +++ b/docs/api/utils.md @@ -7,32 +7,29 @@ title: Utils ## Visualization ```{eval-rst} -.. autoclass:: gymnasium.utils.play.PlayableGame - - .. automethod:: process_event - +.. autofunction:: gymnasium.utils.play.play .. autoclass:: gymnasium.utils.play.PlayPlot .. automethod:: callback -.. autofunction:: gymnasium.utils.play.display_arr -.. autofunction:: gymnasium.utils.play.play - +.. autoclass:: gymnasium.utils.play.PlayableGame + + .. automethod:: process_event ``` ## Save Rendering Videos ```{eval-rst} -.. autofunction:: gymnasium.utils.save_video.capped_cubic_video_schedule .. autofunction:: gymnasium.utils.save_video.save_video +.. autofunction:: gymnasium.utils.save_video.capped_cubic_video_schedule ``` ## Old to New Step API Compatibility ```{eval-rst} +.. autofunction:: gymnasium.utils.step_api_compatibility.step_api_compatibility .. autofunction:: gymnasium.utils.step_api_compatibility.convert_to_terminated_truncated_step_api .. autofunction:: gymnasium.utils.step_api_compatibility.convert_to_done_step_api -.. autofunction:: gymnasium.utils.step_api_compatibility.step_api_compatibility ``` ## Seeding @@ -43,16 +40,6 @@ title: Utils ## Environment Checking -### Invasive - ```{eval-rst} .. autofunction:: gymnasium.utils.env_checker.check_env -.. autofunction:: gymnasium.utils.env_checker.data_equivalence -.. autofunction:: gymnasium.utils.env_checker.check_reset_seed -.. autofunction:: gymnasium.utils.env_checker.check_reset_options -.. autofunction:: gymnasium.utils.env_checker.check_reset_return_info_deprecation -.. autofunction:: gymnasium.utils.env_checker.check_seed_deprecation -.. autofunction:: gymnasium.utils.env_checker.check_reset_return_type -.. autofunction:: gymnasium.utils.env_checker.check_space_limit ``` - diff --git a/docs/api/vector.md b/docs/api/vector.md index 3e53de43e..88569f65e 100644 --- a/docs/api/vector.md +++ b/docs/api/vector.md @@ -4,15 +4,26 @@ title: Vector # Vector -```{eval-rst} -.. autofunction:: gymnasium.vector.make -``` - - -## VectorEnv +## Gymnasium.vector.VectorEnv ```{eval-rst} -.. attribute:: gymnasium.vector.VectorEnv.action_space +.. autoclass:: gymnasium.vector.VectorEnv +``` + +### Methods + +```{eval-rst} +.. automethod:: gymnasium.vector.VectorEnv.reset + +.. automethod:: gymnasium.vector.VectorEnv.step + +.. automethod:: gymnasium.vector.VectorEnv.close +``` + +### Attributes + +```{eval-rst} +.. attribute:: action_space The (batched) action space. The input actions of `step` must be valid elements of `action_space`.:: @@ -20,7 +31,7 @@ title: Vector >>> envs.action_space MultiDiscrete([2 2 2]) -.. attribute:: gymnasium.vector.VectorEnv.observation_space +.. attribute:: observation_space The (batched) observation space. The observations returned by `reset` and `step` are valid elements of `observation_space`.:: @@ -28,7 +39,7 @@ title: Vector >>> envs.observation_space Box([[-4.8 ...]], [[4.8 ...]], (3, 4), float32) -.. attribute:: gymnasium.vector.VectorEnv.single_action_space +.. attribute:: single_action_space The action space of an environment copy.:: @@ -36,55 +47,29 @@ title: Vector >>> envs.single_action_space Discrete(2) -.. attribute:: gymnasium.vector.VectorEnv.single_observation_space +.. attribute:: single_observation_space The observation space of an environment copy.:: >>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3) >>> envs.single_action_space Box([-4.8 ...], [4.8 ...], (4,), float32) -``` +``` - - -### Reset +## Making Vector Environments ```{eval-rst} -.. automethod:: gymnasium.vector.VectorEnv.reset -``` - -```python ->>> import gymnasium as gym ->>> envs = gym.vector.make("CartPole-v1", num_envs=3) ->>> envs.reset() -(array([[-0.02240574, -0.03439831, -0.03904812, 0.02810693], - [ 0.01586068, 0.01929009, 0.02394426, 0.04016077], - [-0.01314174, 0.03893502, -0.02400815, 0.0038326 ]], - dtype=float32), {}) +.. autofunction:: gymnasium.vector.make ``` -### Step + +## Async Vector Env ```{eval-rst} -.. automethod:: gymnasium.vector.VectorEnv.step -``` - -```python ->>> envs = gym.vector.make("CartPole-v1", num_envs=3) ->>> envs.reset() ->>> actions = np.array([1, 0, 1]) ->>> observations, rewards, termination, truncation, infos = envs.step(actions) - ->>> observations -array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266], - [ 0.00788269, -0.17490888, 0.03393489, 0.31735462], - [ 0.04918966, 0.19421194, 0.02938497, -0.29495203]], - dtype=float32) ->>> rewards -array([1., 1., 1.]) ->>> termination -array([False, False, False]) ->>> termination -array([False, False, False]) ->>> infos -{} +.. autoclass:: gymnasium.vector.AsyncVectorEnv +``` + +## Sync Vector Env + +```{eval-rst} +.. autoclass:: gymnasium.vector.SyncVectorEnv ``` diff --git a/docs/api/wrappers.md b/docs/api/wrappers.md index 9a7044b47..a1ef35a16 100644 --- a/docs/api/wrappers.md +++ b/docs/api/wrappers.md @@ -1,196 +1,136 @@ --- -title: Wrappers -lastpage: +title: Wrapper --- # Wrappers -Wrappers are a convenient way to modify an existing environment without having to alter the underlying code directly. -Using wrappers will allow you to avoid a lot of boilerplate code and make your environment more modular. Wrappers can -also be chained to combine their effects. Most environments that are generated via `gymnasium.make` will already be wrapped by default. - -In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along -with (possibly optional) parameters to the wrapper's constructor: -```python ->>> import gymnasium as gym ->>> from gymnasium.wrappers import RescaleAction ->>> base_env = gym.make("BipedalWalker-v3") ->>> base_env.action_space -Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32) ->>> wrapped_env = RescaleAction(base_env, min_action=0, max_action=1) ->>> wrapped_env.action_space -Box([0. 0. 0. 0.], [1. 1. 1. 1.], (4,), float32) -``` -You can access the environment underneath the **first** wrapper by using -the `.env` attribute: - -```python ->>> wrapped_env ->>>> ->>> wrapped_env.env ->>> +```{toctree} +:hidden: +wrappers/misc_wrappers +wrappers/action_wrappers +wrappers/observation_wrappers +wrappers/reward_wrappers ``` -If you want to get to the environment underneath **all** of the layers of wrappers, -you can use the `.unwrapped` attribute. -If the environment is already a bare environment, the `.unwrapped` attribute will just return itself. +## gymnasium.Wrapper -```python ->>> wrapped_env ->>>> ->>> wrapped_env.unwrapped - +```{eval-rst} +.. autoclass:: gymnasium.Wrapper ``` -There are three common things you might want a wrapper to do: +### Methods -- Transform actions before applying them to the base environment -- Transform observations that are returned by the base environment -- Transform rewards that are returned by the base environment +```{eval-rst} +.. autofunction:: gymnasium.Wrapper.step +.. autofunction:: gymnasium.Wrapper.reset +.. autofunction:: gymnasium.Wrapper.close +``` -Such wrappers can be easily implemented by inheriting from `ActionWrapper`, `ObservationWrapper`, or `RewardWrapper` and implementing the -respective transformation. If you need a wrapper to do more complicated tasks, you can inherit from the `Wrapper` class directly. -The code that is presented in the following sections can also be found in -the [gym-examples](https://github.com/Farama-Foundation/gym-examples) repository +### Attributes -## ActionWrapper -If you would like to apply a function to the action before passing it to the base environment, -you can simply inherit from `ActionWrapper` and overwrite the method `action` to implement that transformation. -The transformation defined in that method must take values in the base environment's action space. -However, its domain might differ from the original action space. In that case, you need to specify the new -action space of the wrapper by setting `self.action_space` in the `__init__` method of your wrapper. +```{eval-rst} +.. autoproperty:: gymnasium.Wrapper.action_space +.. autoproperty:: gymnasium.Wrapper.observation_space +.. autoproperty:: gymnasium.Wrapper.reward_range +.. autoproperty:: gymnasium.Wrapper.spec +.. autoproperty:: gymnasium.Wrapper.metadata +.. autoproperty:: gymnasium.Wrapper.np_random +.. autoproperty:: gymnasium.Wrapper.unwrapped +``` -Let's say you have an environment with action space of type `Box`, but you would -only like to use a finite subset of actions. Then, you might want to implement the following wrapper +## Gymnasium Wrappers -```python -class DiscreteActions(gym.ActionWrapper): - def __init__(self, env, disc_to_cont): - super().__init__(env) - self.disc_to_cont = disc_to_cont - self.action_space = Discrete(len(disc_to_cont)) +Gymnasium provides a number of commonly used wrappers listed below. More information can be found on the particular +wrapper in the page on the wrapper type + +```{eval-rst} +.. py:currentmodule:: gymnasium.wrappers + +.. list-table:: + :header-rows: 1 - def action(self, act): - return self.disc_to_cont[act] - -if __name__ == "__main__": - env = gym.make("LunarLanderContinuous-v2") - wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]), - np.array([0,1]), np.array([0,-1])]) - print(wrapped_env.action_space) #Discrete(4) + * - Name + - Type + - Description + * - :class:`AtariPreprocessing` + - Misc Wrapper + - Implements the common preprocessing applied tp Atari environments + * - :class:`AutoResetWrapper` + - Misc Wrapper + - The wrapped environment will automatically reset when the terminated or truncated state is reached. + * - :class:`ClipAction` + - Action Wrapper + - Clip the continuous action to the valid bound specified by the environment's `action_space` + * - :class:`EnvCompatibility` + - Misc Wrapper + - Provides compatibility for environments written in the OpenAI Gym v0.21 API to look like Gymnasium environments + * - :class:`FilterObservation` + - Observation Wrapper + - Filters a dictionary observation spaces to only requested keys + * - :class:`FlattenObservation` + - Observation Wrapper + - An Observation wrapper that flattens the observation + * - :class:`FrameStack` + - Observation Wrapper + - AnObservation wrapper that stacks the observations in a rolling manner. + * - :class:`GrayScaleObservation` + - Observation Wrapper + - Convert the image observation from RGB to gray scale. + * - :class:`HumanRendering` + - Misc Wrapper + - Allows human like rendering for environments that support "rgb_array" rendering + * - :class:`NormalizeObservation` + - Observation Wrapper + - This wrapper will normalize observations s.t. each coordinate is centered with unit variance. + * - :class:`NormalizeReward` + - Reward Wrapper + - This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance. + * - :class:`OrderEnforcing` + - Misc Wrapper + - This will produce an error if `step` or `render` is called before `reset` + * - :class:`PixelObservationWrapper` + - Observation Wrapper + - Augment observations by pixel values obtained via `render` that can be added to or replaces the environments observation. + * - :class:`RecordEpisodeStatistics` + - Misc Wrapper + - This will keep track of cumulative rewards and episode lengths returning them at the end. + * - :class:`RecordVideo` + - Misc Wrapper + - This wrapper will record videos of rollouts. + * - :class:`RenderCollection` + - Misc Wrapper + - Enable list versions of render modes, i.e. "rgb_array_list" for "rgb_array" such that the rendering for each step are saved in a list until `render` is called. + * - :class:`RescaleAction` + - Action Wrapper + - Rescales the continuous action space of the environment to a range \[`min_action`, `max_action`], where `min_action` and `max_action` are numpy arrays or floats. + * - :class:`ResizeObservation` + - Observation Wrapper + - This wrapper works on environments with image observations (or more generally observations of shape AxBxC) and resizes the observation to the shape given by the tuple `shape`. + * - :class:`StepAPICompatibility` + - Misc Wrapper + - Modifies an environment step function from (old) done to the (new) termination / truncation API. + * - :class:`TimeAwareObservation` + - Observation Wrapper + - Augment the observation with current time step in the trajectory (by appending it to the observation). + * - :class:`TimeLimit` + - Misc Wrapper + - This wrapper will emit a truncated signal if the specified number of steps is exceeded in an episode. + * - :class:`TransformObservation` + - Observation Wrapper + - This wrapper will apply function to observations + * - :class:`TransformReward` + - Reward Wrapper + - This wrapper will apply function to rewards + * - :class:`VectorListInfo` + - Misc Wrapper + - This wrapper will convert the info of a vectorized environment from the `dict` format to a `list` of dictionaries where the i-th dictionary contains info of the i-th environment. ``` -Among others, Gymnasium provides the action wrappers `ClipAction` and `RescaleAction`. - -## ObservationWrapper -If you would like to apply a function to the observation that is returned by the base environment before passing -it to learning code, you can simply inherit from `ObservationWrapper` and overwrite the method `observation` to -implement that transformation. The transformation defined in that method must be defined on the base environment's -observation space. However, it may take values in a different space. In that case, you need to specify the new -observation space of the wrapper by setting `self.observation_space` in the `__init__` method of your wrapper. - -For example, you might have a 2D navigation task where the environment returns dictionaries as observations with keys `"agent_position"` -and `"target_position"`. A common thing to do might be to throw away some degrees of freedom and only consider -the position of the target relative to the agent, i.e. `observation["target_position"] - observation["agent_position"]`. -For this, you could implement an observation wrapper like this: - -```python -class RelativePosition(gym.ObservationWrapper): - def __init__(self, env): - super().__init__(env) - self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf) - - def observation(self, obs): - return obs["target"] - obs["agent"] -``` - -Among others, Gymnasium provides the observation wrapper `TimeAwareObservation`, which adds information about the index of the timestep -to the observation. - -## RewardWrapper -If you would like to apply a function to the reward that is returned by the base environment before passing -it to learning code, you can simply inherit from `RewardWrapper` and overwrite the method `reward` to -implement that transformation. This transformation might change the reward range; to specify the reward range of -your wrapper, you can simply define `self.reward_range` in `__init__`. - -Let us look at an example: Sometimes (especially when we do not have control over the reward because it is intrinsic), we want to clip the reward -to a range to gain some numerical stability. To do that, we could, for instance, implement the following wrapper: - -```python -class ClipReward(gym.RewardWrapper): - def __init__(self, env, min_reward, max_reward): - super().__init__(env) - self.min_reward = min_reward - self.max_reward = max_reward - self.reward_range = (min_reward, max_reward) - - def reward(self, reward): - return np.clip(reward, self.min_reward, self.max_reward) -``` - -## AutoResetWrapper - -Some users may want a wrapper which will automatically reset its wrapped environment when its wrapped environment reaches the done state. An advantage of this environment is that it will never produce undefined behavior as standard gymnasium environments do when stepping beyond the done state. - -When calling step causes `self.env.step()` to return `(terminated or truncated)=True`, -`self.env.reset()` is called, -and the return format of `self.step()` is as follows: - -```python -new_obs, final_reward, final_terminated, final_truncated, info -``` - -`new_obs` is the first observation after calling `self.env.reset()`, - -`final_reward` is the reward after calling `self.env.step()`, -prior to calling `self.env.reset()` - -The expression `(final_terminated or final_truncated)` is always `True` - -`info` is a dict containing all the keys from the info dict returned by -the call to `self.env.reset()`, with additional keys `final_observation` -containing the observation returned by the last call to `self.env.step()` -and `final_info` containing the info dict returned by the last call -to `self.env.step()`. - -If `(terminated or truncated)` is not true when `self.env.step()` is called, `self.step()` returns - -```python -obs, reward, terminated, truncated, info -``` -as normal. - - -The AutoResetWrapper is not applied by default when calling `gymnasium.make()`, but can be applied by setting the optional `autoreset` argument to `True`: - -```python - env = gym.make("CartPole-v1", autoreset=True) -``` - -The AutoResetWrapper can also be applied using its constructor: -```python - env = gym.make("CartPole-v1") - env = AutoResetWrapper(env) -``` - - -```{note} -When using the AutoResetWrapper to collect rollouts, note -that the when `self.env.step()` returns `done`, a -new observation from after calling `self.env.reset()` is returned -by `self.step()` alongside the terminal reward and done state from the -previous episode . If you need the terminal state from the previous -episode, you need to retrieve it via the the `final_observation` key -in the info dict. Make sure you know what you're doing if you -use this wrapper! -``` - - -## General Wrappers +## Implementing a custom wrapper Sometimes you might need to implement a wrapper that does some more complicated modifications (e.g. modify the reward based on data in `info` or change the rendering behavior). -Such wrappers can be implemented by inheriting from `Wrapper`. +Such wrappers can be implemented by inheriting from Misc Wrapper. - You can set a new action or observation space by defining `self.action_space` or `self.observation_space` in `__init__`, respectively - You can set new metadata and reward range by defining `self.metadata` and `self.reward_range` in `__init__`, respectively @@ -204,6 +144,8 @@ initialization of the environment. However, *Reacher* does not allow you to do t of the reward are returned in `info`, so let us build a wrapper for Reacher that allows us to weight those terms: ```python +import gymnasium as gym + class ReacherRewardWrapper(gym.Wrapper): def __init__(self, env, reward_dist_weight, reward_ctrl_weight): super().__init__(env) @@ -221,29 +163,4 @@ class ReacherRewardWrapper(gym.Wrapper): ```{note} It is *not* sufficient to use a `RewardWrapper` in this case! -``` - -## Available Wrappers - -| Name | Type | Arguments | Description | -|---------------------------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `AtariPreprocessing` | `gymnasium.Wrapper` | `env: gymnasium.Env`, `noop_max: int = 30`, `frame_skip: int = 4`, `screen_size: int = 84`, `terminal_on_life_loss: bool = False`, `grayscale_obs: bool = True`, `grayscale_newaxis: bool = False`, `scale_obs: bool = False` | Implements the best practices from Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents" but will be deprecated soon. | -| `AutoResetWrapper` | `gymnasium.Wrapper` | `env` | The wrapped environment will automatically reset when the done state is reached. Make sure you read the documentation before using this wrapper! | -| `ClipAction` | `gymnasium.ActionWrapper` | `env` | Clip the continuous action to the valid bound specified by the environment's `action_space` | -| `FilterObservation` | `gymnasium.ObservationWrapper` | `env`, `filter_keys=None` | If you have an environment that returns dictionaries as observations, but you would like to only keep a subset of the entries, you can use this wrapper. `filter_keys` should be an iterable that contains the keys that are kept in the new observation. If it is `None`, all keys will be kept and the wrapper has no effect. | -| `FlattenObservation` | `gymnasium.ObservationWrapper` | `env` | Observation wrapper that flattens the observation | -| `FrameStack` | `gymnasium.ObservationWrapper` | `env`, `num_stack`, `lz4_compress=False` | Observation wrapper that stacks the observations in a rolling manner. For example, if the number of stacks is 4, then the returned observation contains the most recent 4 observations. Observations will be objects of type `LazyFrames`. This object can be cast to a numpy array via `np.asarray(obs)`. You can also access single frames or slices via the usual `__getitem__` syntax. If `lz4_compress` is set to true, the `LazyFrames` object will compress the frames internally (losslessly). The first observation (i.e. the one returned by `reset`) will consist of `num_stack` repitions of the first frame. | -| `GrayScaleObservation` | `gymnasium.ObservationWrapper` | `env`, `keep_dim=False` | Convert the image observation from RGB to gray scale. By default, the resulting observation will be 2-dimensional. If `keep_dim` is set to true, a singleton dimension will be added (i.e. the observations are of shape AxBx1). | -| `NormalizeReward` | `gymnasium.Wrapper` | `env`, `gamma=0.99`, `epsilon=1e-8` | This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance. `epsilon` is a stability parameter and `gamma` is the discount factor that is used in the exponential moving average. The exponential moving average will have variance `(1 - gamma)**2`. The scaling depends on past trajectories and rewards will not be scaled correctly if the wrapper was newly instantiated or the policy was changed recently. | -| `NormalizeObservation` | `gymnasium.Wrapper` | `env`, `epsilon=1e-8` | This wrapper will normalize observations s.t. each coordinate is centered with unit variance. The normalization depends on past trajectories and observations will not be normalized correctly if the wrapper was newly instantiated or the policy was changed recently. `epsilon` is a stability parameter that is used when scaling the observations. | -| `OrderEnforcing` | `gymnasium.Wrapper` | `env` | This will produce an error if `step` is called before an initial `reset` | -| `PixelObservationWrapper` | `gymnasium.ObservationWrapper` | `env`, `pixels_only=True`, `render_kwargs=None`, `pixel_keys=("pixels",)` | Augment observations by pixel values obtained via `render`. You can specify whether the original observations should be discarded entirely or be augmented by setting `pixels_only`. Also, you can provide keyword arguments for `render`. | -| `RecordEpisodeStatistics` | `gymnasium.Wrapper` | `env`, `deque_size=100` | This will keep track of cumulative rewards and episode lengths. At the end of an episode, the statistics of the episode will be added to `info`. Moreover, the rewards and episode lengths are stored in buffers that can be accessed via `wrapped_env.return_queue` and `wrapped_env.length_queue` respectively. The size of these buffers can be set via `deque_size`. | -| `RecordVideo` | `gymnasium.Wrapper` | `env`, `video_folder: str`, `episode_trigger: Callable[[int], bool] = None`, `step_trigger: Callable[[int], bool] = None`, `video_length: int = 0`, `name_prefix: str = "rl-video"` | This wrapper will record videos of rollouts. The results will be saved in the folder specified via `video_folder`. You can specify a prefix for the filenames via `name_prefix`. Usually, you only want to record the environment intermittently, say every hundreth episode. To allow this, you can pass `episode_trigger` or `step_trigger`. At most one of these should be passed. These functions will accept an episode index or step index, respectively. They should return a boolean that indicates whether a recording should be started at this point. If neither `episode_trigger`, nor `step_trigger` is passed, a default `episode_trigger` will be used. By default, the recording will be stopped once a done signal has been emitted by the environment. However, you can also create recordings of fixed length (possibly spanning several episodes) by passing a strictly positive value for `video_length`. | -| `RescaleAction` | `gymnasium.ActionWrapper` | `env`, `min_action`, `max_action` | Rescales the continuous action space of the environment to a range \[`min_action`, `max_action`], where `min_action` and `max_action` are numpy arrays or floats. | -| `ResizeObservation` | `gymnasium.ObservationWrapper` | `env`, `shape` | This wrapper works on environments with image observations (or more generally observations of shape AxBxC) and resizes the observation to the shape given by the tuple `shape`. The argument `shape` may also be an integer. In that case, the observation is scaled to a square of sidelength `shape` | -| `TimeAwareObservation` | `gymnasium.ObservationWrapper` | `env` | Augment the observation with current time step in the trajectory (by appending it to the observation). This can be useful to ensure that things stay Markov. Currently it only works with one-dimensional observation spaces. | -| `TimeLimit` | `gymnasium.Wrapper` | `env`, `max_episode_steps=None` | Probably the most useful wrapper in Gymnasium. This wrapper will emit a done signal if the specified number of steps is exceeded in an episode. In order to be able to distinguish termination and truncation, you need to check `info`. If it does not contain the key `"TimeLimit.truncated"`, the environment did not reach the timelimit. Otherwise, `info["TimeLimit.truncated"]` will be true if the episode was terminated because of the time limit. | -| `TransformObservation` | `gymnasium.ObservationWrapper` | `env`, `f` | This wrapper will apply `f` to observations | -| `TransformReward` | `gymnasium.RewardWrapper` | `env`, `f` | This wrapper will apply `f` to rewards | -| `VectorListInfo` | `gymnasium.Wrapper` | `env` | This wrapper will convert the info of a vectorized environment from the `dict` format to a `list` of dictionaries where the _i-th_ dictionary contains info of the _i-th_ environment. If using other wrappers that perform operation on info like `RecordEpisodeStatistics`, this need to be the outermost wrapper. | +``` \ No newline at end of file diff --git a/docs/api/wrappers/action_wrappers.md b/docs/api/wrappers/action_wrappers.md new file mode 100644 index 000000000..c8fa8d8ff --- /dev/null +++ b/docs/api/wrappers/action_wrappers.md @@ -0,0 +1,22 @@ +# Action Wrappers + +## Action Wrapper + +```{eval-rst} +.. autoclass:: gymnasium.ActionWrapper + + .. autofunction:: gymnasium.ActionWrapper.action +``` + +## Clip Action + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.ClipAction +``` + +## Rescale Action + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.RescaleAction +``` + diff --git a/docs/api/wrappers/misc_wrappers.md b/docs/api/wrappers/misc_wrappers.md new file mode 100644 index 000000000..065db00f9 --- /dev/null +++ b/docs/api/wrappers/misc_wrappers.md @@ -0,0 +1,68 @@ +# Misc Wrappers + +## Atari Preprocessing + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.AtariPreprocessing +``` + +## Autoreset + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.AutoResetWrapper +``` + +## Compatibility + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.EnvCompatibility +.. autoclass:: gymnasium.wrappers.StepAPICompatibility +``` + +## Passive Environment Checker + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.PassiveEnvChecker +``` + +## Human Rendering + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.HumanRendering +``` + +## Order Enforcing + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.OrderEnforcing +``` + +## Record Episode Statistics + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.RecordEpisodeStatistics +``` + +## Record Video + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.RecordVideo +``` + +## Render Collection + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.RenderCollection +``` + +## Time Limit + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.TimeLimit +``` + +## Vector List Info + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.VectorListInfo +``` diff --git a/docs/api/wrappers/observation_wrappers.md b/docs/api/wrappers/observation_wrappers.md new file mode 100644 index 000000000..33bf97b2d --- /dev/null +++ b/docs/api/wrappers/observation_wrappers.md @@ -0,0 +1,62 @@ +# Observation Wrappers + +## Observation Wrapper + +```{eval-rst} +.. autoclass:: gymnasium.ObservationWrapper +.. autofunction:: gymnasium.ObservationWrapper.observation +``` + +## Transform Observation + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.TransformObservation +``` + +## Filter Observation + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.FilterObservation +``` + +## Flatten Observation + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.FlattenObservation +``` + +## Framestack Observations + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.FrameStack +``` + +## Gray Scale Observation + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.GrayScaleObservation +``` + +## Normalize Observation + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.NormalizeObservation +``` + +## Pixel Observation Wrapper + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.PixelObservationWrapper +``` + +## Resize Observation + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.ResizeObservation +``` + +## Time Aware Observation + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.TimeAwareObservation +``` diff --git a/docs/api/wrappers/reward_wrappers.md b/docs/api/wrappers/reward_wrappers.md new file mode 100644 index 000000000..d590f892c --- /dev/null +++ b/docs/api/wrappers/reward_wrappers.md @@ -0,0 +1,22 @@ + +# Reward Wrappers + +## Reward Wrapper + +```{eval-rst} +.. autoclass:: gymnasium.RewardWrapper + + .. autofunction:: gymnasium.RewardWrapper.reward +``` + +## Transform Reward + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.TransformReward +``` + +## Normalize Reward + +```{eval-rst} +.. autoclass:: gymnasium.wrappers.NormalizeReward +``` diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md index d7ed4bed8..84e198a21 100644 --- a/docs/content/basic_usage.md +++ b/docs/content/basic_usage.md @@ -7,6 +7,7 @@ firstpage: # Basic Usage ## Initializing Environments + Initializing environments is very easy in Gymnasium and can be done via: ```python @@ -15,6 +16,7 @@ env = gym.make('CartPole-v1') ``` ## Interacting with the Environment + Gymnasium implements the classic "agent-environment loop": ```{image} /_static/diagrams/AE_loop.png @@ -84,6 +86,7 @@ It is possible for `terminated=True` and `truncated=True` to occur at the same t This is explained in detail in the `Handling Time Limits` section. #### Backward compatibility + Gym will retain support for the old API through compatibility wrappers. Users can toggle the old API through `make` by setting `apply_api_compatibility=True`. @@ -100,6 +103,7 @@ For more details see the wrappers section. ## Checking API-Conformity + If you have implemented a custom environment and would like to perform a sanity check to make sure that it conforms to the API, you can run: @@ -117,6 +121,7 @@ not check the `render` method. To change this behavior, you can pass `skip_rende been closed! ## Spaces + Spaces are usually used to specify the format of valid actions and observations. Every environment should have the attributes `action_space` and `observation_space`, both of which should be instances of classes that inherit from `Space`. @@ -163,6 +168,7 @@ OrderedDict([('position', 0), ('velocity', 1)]) ``` ## Wrappers + Wrappers are a convenient way to modify an existing environment without having to alter the underlying code directly. Using wrappers will allow you to avoid a lot of boilerplate code and make your environment more modular. Wrappers can also be chained to combine their effects. Most environments that are generated via `gymnasium.make` will already be wrapped by default. @@ -210,6 +216,7 @@ If you have a wrapped environment, and you want to get the unwrapped environment ``` ## Playing within an environment + You can also play the environment using your keyboard using the `play` function in `gymnasium.utils.play`. ```python from gymnasium.utils.play import play diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md index e97694a27..b6779f735 100644 --- a/docs/content/environment_creation.md +++ b/docs/content/environment_creation.md @@ -2,6 +2,7 @@ layout: "contents" title: Environment Creation --- + # Make your own custom environment This documentation overviews creating new environments and relevant useful wrappers, utilities and tests included in Gymnasium designed for the creation of new environments. @@ -59,6 +60,7 @@ where the blue dot is the agent and the red square represents the target. Let us look at the source code of `GridWorldEnv` piece by piece: ### Declaration and Initialization + Our custom environment will inherit from the abstract class `gymnasium.Env`. You shouldn't forget to add the `metadata` attribute to your class. There, you should specify the render-modes that are supported by your environment (e.g. `"human"`, `"rgb_array"`, `"ansi"`) and the framerate at which your environment should be rendered. Every environment should support `None` as render-mode; you don't need to add it in the metadata. @@ -71,6 +73,7 @@ We will choose to represent observations in the form of dictionaries with keys ` may look like ` {"agent": array([1, 0]), "target": array([0, 3])}`. Since we have 4 actions in our environment ("right", "up", "left", "down"), we will use `Discrete(4)` as an action space. Here is the declaration of `GridWorldEnv` and the implementation of `__init__`: + ```python import gymnasium as gym from gymnasium import spaces @@ -125,6 +128,7 @@ class GridWorldEnv(gym.Env): ``` ### Constructing Observations From Environment States + Since we will need to compute observations both in `reset` and `step`, it is often convenient to have a (private) method `_get_obs` that translates the environment's state into an observation. However, this is not mandatory and you may as well compute observations in `reset` and `step` separately: @@ -142,6 +146,7 @@ Oftentimes, info will also contain some data that is only available inside the ` terms). In that case, we would have to update the dictionary that is returned by `_get_info` in `step`. ### Reset + The `reset` method will be called to initiate a new episode. You may assume that the `step` method will not be called before `reset` has been called. Moreover, `reset` should be called whenever a done signal has been issued. Users may pass the `seed` keyword to `reset` to initialize any random number generator that is used by the environment @@ -180,6 +185,7 @@ and `_get_info` that we implemented earlier for that: ``` ### Step + The `step` method usually contains most of the logic of your environment. It accepts an `action`, computes the state of the environment after applying that action and returns the 4-tuple `(observation, reward, done, info)`. Once the new state of the environment has been computed, we can check whether it is a terminal state and we set `done` @@ -207,6 +213,7 @@ accordingly. Since we are using sparse binary rewards in `GridWorldEnv`, computi ``` ### Rendering + Here, we are using PyGame for rendering. A similar approach to rendering is used in many environments that are included with Gymnasium and you can use it as a skeleton for your own environments: @@ -279,6 +286,7 @@ with Gymnasium and you can use it as a skeleton for your own environments: ``` ### Close + The `close` method should close any open resources that were used by the environment. In many cases, you don't actually have to bother to implement this method. However, in our example `render_mode` may be `"human"` and we might need to close the window that has been opened: @@ -353,6 +361,7 @@ setup( ``` ## Creating Environment Instances + After you have installed your package locally with `pip install -e gym-examples`, you can create an instance of the environment via: ```python @@ -372,6 +381,7 @@ constructor yourself. Some may find this approach more pythonic and environments also perfectly fine (but remember to add wrappers as well!). ## Using Wrappers + Oftentimes, we want to use different variants of a custom environment, or we want to modify the behavior of an environment that is provided by Gymnasium or some other party. Wrappers allow us to do this without changing the environment implementation or adding any boilerplate code. diff --git a/docs/content/handling_timelimits.md b/docs/content/handling_timelimits.md index ea392b12c..bd0d716d0 100644 --- a/docs/content/handling_timelimits.md +++ b/docs/content/handling_timelimits.md @@ -1,27 +1,45 @@ # Handling Time Limits + In using Gymnasium environments with reinforcement learning code, a common problem observed is how time limits are incorrectly handled. The `done` signal received (in previous versions of OpenAI Gym < 0.26) from `env.step` indicated whether an episode has ended. However, this signal did not distinguish whether the episode ended due to `termination` or `truncation`. -### Termination -Termination refers to the episode ending after reaching a terminal state that is defined as part of the environment definition. Examples are - task success, task failure, robot falling down etc. Notably, this also includes episodes ending in finite-horizon environments due to a time-limit inherent to the environment. Note that to preserve Markov property, a representation of the remaining time must be present in the agent's observation in finite-horizon environments. [(Reference)](https://arxiv.org/abs/1712.00378) +In using Gymnasium environments with reinforcement learning code, a common problem observed is how time limits are +incorrectly handled. The `done` signal received (in previous versions of gymnasium < 0.26) from `env.step` indicated +whether an episode has ended. However, this signal did not distinguish whether the episode ended due to `termination` or `truncation`. +## Termination -### Truncation -Truncation refers to the episode ending after an externally defined condition (that is outside the scope of the Markov Decision Process). This could be a time-limit, a robot going out of bounds etc. +Termination refers to the episode ending after reaching a terminal state that is defined as part of the environment +definition. Examples are - task success, task failure, robot falling down etc. Notably, this also includes episodes +ending in finite-horizon environments due to a time-limit inherent to the environment. Note that to preserve Markov +property, a representation of the remaining time must be present in the agent's observation in finite-horizon environments. +[(Reference)](https://arxiv.org/abs/1712.00378) -An infinite-horizon environment is an obvious example of where this is needed. We cannot wait forever for the episode to complete, so we set a practical time-limit after which we forcibly halt the episode. The last state in this case is not a terminal state since it has a non-zero transition probability of moving to another state as per the Markov Decision Process that defines the RL problem. This is also different from time-limits in finite horizon environments as the agent in this case has no idea about this time-limit. +## Truncation +Truncation refers to the episode ending after an externally defined condition (that is outside the scope of the Markov +Decision Process). This could be a time-limit, a robot going out of bounds etc. -### Importance in learning code +An infinite-horizon environment is an obvious example of where this is needed. We cannot wait forever for the episode +to complete, so we set a practical time-limit after which we forcibly halt the episode. The last state in this case is +not a terminal state since it has a non-zero transition probability of moving to another state as per the Markov +Decision Process that defines the RL problem. This is also different from time-limits in finite horizon environments +as the agent in this case has no idea about this time-limit. -Bootstrapping (using one or more estimated values of a variable to update estimates of the same variable) is a key aspect of Reinforcement Learning. A value function will tell you how much discounted reward you will get from a particular state if you follow a given policy. When an episode stops at any given point, by looking at the value of the final state, the agent is able to estimate how much discounted reward could have been obtained if the episode has continued. This is an example of handling truncation. +## Importance in learning code +Bootstrapping (using one or more estimated values of a variable to update estimates of the same variable) is a key +aspect of Reinforcement Learning. A value function will tell you how much discounted reward you will get from a +particular state if you follow a given policy. When an episode stops at any given point, by looking at the value of +the final state, the agent is able to estimate how much discounted reward could have been obtained if the episode has +continued. This is an example of handling truncation. More formally, a common example of bootstrapping in RL is updating the estimate of the Q-value function, ```math Q_{target}(o_t, a_t) = r_t + \gamma . \max_a(Q(o_{t+1}, a_{t+1})) ``` -In classical RL, the new `Q` estimate is a weighted average of the previous `Q` estimate and `Q_target` while in Deep Q-Learning, the error between `Q_target` and the previous `Q` estimate is minimized. +In classical RL, the new `Q` estimate is a weighted average of the previous `Q` estimate and `Q_target` while in Deep +Q-Learning, the error between `Q_target` and the previous `Q` estimate is minimized. However, at the terminal state, bootstrapping is not done, @@ -29,9 +47,11 @@ However, at the terminal state, bootstrapping is not done, Q_{target}(o_t, a_t) = r_t ``` -This is where the distinction between termination and truncation becomes important. When an episode ends due to termination we don't bootstrap, when it ends due to truncation, we bootstrap. +This is where the distinction between termination and truncation becomes important. When an episode ends due to +termination we don't bootstrap, when it ends due to truncation, we bootstrap. -While using gymnasium environments, the `done` signal (default for < v0.26) is frequently used to determine whether to bootstrap or not. However, this is incorrect since it does not differentiate between termination and truncation. +While using gymnasium environments, the `done` signal (default for < v0.26) is frequently used to determine whether to +bootstrap or not. However, this is incorrect since it does not differentiate between termination and truncation. A simple example of value functions is shown below. This is an illustrative example and not part of any specific algorithm. @@ -42,9 +62,11 @@ vf_target = rew + gamma * (1-done)* vf_next_state This is incorrect in the case of episode ending due to a truncation, where bootstrapping needs to happen but it doesn't. -### Solution +## Solution -From v0.26 onwards, Gymnasium's `env.step` API returns both termination and truncation information explicitly. In the previous version truncation information was supplied through the info key `TimeLimit.truncated`. The correct way to handle terminations and truncations now is, +From v0.26 onwards, Gymnasium's `env.step` API returns both termination and truncation information explicitly. +In the previous version truncation information was supplied through the info key `TimeLimit.truncated`. +The correct way to handle terminations and truncations now is, ```python # terminated = done and 'TimeLimit.truncated' not in info # This was needed in previous versions. diff --git a/docs/content/vectorising.md b/docs/content/vectorising.md index d402d2104..d2f66edb1 100644 --- a/docs/content/vectorising.md +++ b/docs/content/vectorising.md @@ -3,9 +3,10 @@ layout: "contents" title: Vectorising your environments --- -# Vectorising your environments +# Vectorizing your environments ## Vectorized Environments + *Vectorized environments* are environments that run multiple independent copies of the same environment in parallel using [multiprocessing](https://docs.python.org/3/library/multiprocessing.html). Vectorized environments take as input a batch of actions, and return a batch of observations. This is particularly useful, for example, when the policy is defined as a neural network that operates over a batch of observations. Gymnasium provides two types of vectorized environments: @@ -48,6 +49,7 @@ The function `gymnasium.vector.make` is meant to be used only in basic cases (e. ### Creating a vectorized environment + To create a vectorized environment that runs multiple environment copies, you can wrap your parallel environments inside `gymnasium.vector.SyncVectorEnv` (for sequential execution), or `gymnasium.vector.AsyncVectorEnv` (for parallel execution, with [multiprocessing](https://docs.python.org/3/library/multiprocessing.html)). These vectorized environments take as input a list of callables specifying how the copies are created. ```python @@ -81,7 +83,9 @@ When using `AsyncVectorEnv` with either the ``spawn`` or ``forkserver`` start me if __name__ == "__main__": envs = gymnasium.vector.make("CartPole-v1", num_envs=3, context="spawn") ``` + ### Working with vectorized environments + While standard Gymnasium environments take a single action and return a single observation (with a reward, and boolean indicating termination), vectorized environments take a *batch of actions* as input, and return a *batch of observations*, together with an array of rewards and booleans indicating if the episode ended in each environment copy. @@ -192,8 +196,8 @@ If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dt None], dtype=object), '_final_observation': array([False, True, False])} ``` - ## Observation & Action spaces + Like any Gymnasium environment, vectorized environments contain the two properties `VectorEnv.observation_space` and `VectorEnv.action_space` to specify the observation and action spaces of the environments. Since vectorized environments operate on multiple environment copies, where the actions taken and observations returned by all of the copies are batched together, the observation and action *spaces* are batched as well so that the input actions are valid elements of `VectorEnv.action_space`, and the observations are valid elements of `VectorEnv.observation_space`. ```python @@ -246,6 +250,7 @@ This is convenient, for example, if you instantiate a policy. In the following e ## Intermediate Usage ### Shared memory + `AsyncVectorEnv` runs each environment copy inside an individual process. At each call to `AsyncVectorEnv.reset` or `AsyncVectorEnv.step`, the observations of all of the parallel environments are sent back to the main process. To avoid expensive transfers of data between processes, especially with large observations (e.g. images), `AsyncVectorEnv` uses a shared memory by default (``shared_memory=True``) that processes can write to and read from at minimal cost. This can increase the throughput of the vectorized environment. ```python @@ -263,6 +268,7 @@ This is convenient, for example, if you instantiate a policy. In the following e ``` ### Exception handling + Because sometimes things may not go as planned, the exceptions raised in any given environment copy are re-raised in the vectorized environment, even when the copy runs in parallel with `AsyncVectorEnv`. This way, you can choose how to handle these exceptions yourself (with ``try ... except``). ```python @@ -291,6 +297,7 @@ ValueError: An error occurred. ## Advanced Usage ### Custom spaces + Vectorized environments will batch actions and observations if they are elements from standard Gymnasium spaces, such as `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, or `gymnasium.spaces.Dict`. However, if you create your own environment with a custom action and/or observation space (inheriting from `gymnasium.Space`), the vectorized environment will not attempt to automatically batch the actions/observations, and instead, it will return the raw tuple of elements from all parallel environments. In the following example, we create a new environment `SMILESEnv`, whose observations are strings representing the [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) notation of a molecular structure, with a custom observation space `SMILES`. The observations returned by the vectorized environment are contained in a tuple of strings. diff --git a/docs/environments/atari.md b/docs/environments/atari.md index f80f56d10..d81a46250 100644 --- a/docs/environments/atari.md +++ b/docs/environments/atari.md @@ -79,7 +79,7 @@ atari/zaxxon Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1). -### AutoROM (installing the ROMs) +## AutoROM (installing the ROMs) ALE-py doesn't include the atari ROMs (`pip install gymnasium[atari]`) which are necessary to make any of the atari environments. To install the atari ROM, use `pip install gymnasium[accept-rom-license]` which will install AutoROM and download the ROMs, install them in the default location. @@ -87,7 +87,7 @@ In doing so, you agree to TODO It is possible to install the ROMs in an alternative location, [repo](https://github.com/Farama-Foundation/AutoROM) has more information. -### Action Space +## Action Space The action space is a subset of the following discrete set of legal actions: @@ -120,17 +120,17 @@ The reduced action space of an Atari environment may depend on the "flavor" of t the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the action spaces of default flavor choices. -### Observation Space +## Observation Space The observation issued by an Atari environment may be: - the RGB image that is displayed to a human player, - a grayscale version of that image or - the state of the 128 Bytes of RAM of the console. -### Rewards +## Rewards The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can find these manuals on [AtariAge](https://atariage.com/). -### Stochasticity +## Stochasticity It was pointed out in [[1]](#1) that Atari games are entirely deterministic. Thus, agents could achieve state-of-the-art performance by simply memorizing an optimal sequence of actions while completely ignoring observations from the environment. To avoid this, ALE implements sticky actions: Instead of always simulating the action passed to the environment, there is a small @@ -142,12 +142,10 @@ a tuple of two positive integers. If `frameskip` is an integer, frame skipping i repeated `frameskip` many times. Otherwise, if `frameskip` is a tuple, the number of skipped frames is chosen uniformly at random between `frameskip[0]` (inclusive) and `frameskip[1]` (exclusive) in each environment step. - -### Common Arguments +## Common Arguments When initializing Atari environments via `gymnasium.make`, you may pass some additional arguments. These work for any Atari environment. However, legal values for `mode` and `difficulty` depend on the environment. - - **mode**: `int`. Game mode, see [[2]](#2). Legal values depend on the environment and are listed in the table above. - **difficulty**: `int`. The difficulty of the game, see [[2]](#2). Legal values depend on the environment and are listed in @@ -171,8 +169,7 @@ action space will be reduced to a subset. > It is highly recommended to specify `render_mode` during construction instead of calling `env.render()`. > This will guarantee proper scaling, audio support, and proper framerates - -### Version History and Naming Schemes +## Version History and Naming Schemes All Atari games are available in three versions. They differ in the default settings of the arguments above. The differences are listed in the following table: @@ -206,7 +203,7 @@ are in the "ALE" namespace. The suffix "-ram" is still available. Thus, we get t | ALE/Amidar-v5 | `"rgb"` | `5` | `0.25` | `True` | | ALE/Amidar-ram-v5 | `"ram"` | `5` | `0.25` | `True` | -### Flavors +## Flavors Some games allow the user to set a difficulty level and a game mode. Different modes/difficulties may have different game dynamics and (if a reduced action space is used) different action spaces. We follow the convention of [[2]](#2) and refer to the combination of difficulty level and game mode as a flavor of a game. The following table shows @@ -279,7 +276,7 @@ the available modes and difficulty levels for different Atari games: > Each game also has a valid difficulty for the opposing AI, which has a different range depending on the game. These values can have a range of 0 - n, where n can be found at [the ALE documentation](https://github.com/mgbellemare/Arcade-Learning-Environment/blob/master/docs/games.md) -### References +## References (#1)= [1] diff --git a/docs/environments/atari/adventure.md b/docs/environments/atari/adventure.md index f6caca768..2ada0d993 100644 --- a/docs/environments/atari/adventure.md +++ b/docs/environments/atari/adventure.md @@ -20,18 +20,17 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Adventure-v5")` | -### Description +## Description You must find the enchanted chalice and return it to the golden castle. You can pick up various objects (keys, a sword, a bridge, or a magnet) and have to fight or outmanoeuvre dragons. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -49,8 +48,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Adventure-v5") @@ -69,7 +67,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Adventure-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/air_raid.md b/docs/environments/atari/air_raid.md index 32dd78c51..a760e604e 100644 --- a/docs/environments/atari/air_raid.md +++ b/docs/environments/atari/air_raid.md @@ -19,11 +19,11 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/AirRaid-v5")` | -### Description +## Description You control a ship that can move sideways. You must protect two buildings (one on the right and one on the left side of the screen) from flying saucers that are trying to drop bombs on them. -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -39,8 +39,7 @@ flavor looks like this: | 4 | RIGHTFIRE | | 5 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -57,8 +56,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/AirRaid-v5") @@ -77,7 +75,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("AirRaid-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/alien.md b/docs/environments/atari/alien.md index 26b8fcc7f..93bb666d3 100644 --- a/docs/environments/atari/alien.md +++ b/docs/environments/atari/alien.md @@ -19,19 +19,18 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Alien-v5")` | -### Description +## Description You are stuck in a maze-like space ship with three aliens. You goal is to destroy their eggs that are scattered all over the ship while simultaneously avoiding the aliens (they are trying to kill you). You have a flamethrower that can help you turn them away in tricky situations. Moreover, you can occasionally collect a power-up (pulsar) that gives you the temporary ability to kill aliens. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -54,8 +53,7 @@ via `gymnasium.make`. You score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a table of scores corresponding to the different achievements, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Alien-v5") @@ -75,7 +73,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Alien-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/amidar.md b/docs/environments/atari/amidar.md index 150517593..ced565cac 100644 --- a/docs/environments/atari/amidar.md +++ b/docs/environments/atari/amidar.md @@ -19,12 +19,12 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Amidar-v5")` | -### Description +## Description This game is similar to Pac-Man: You are trying to visit all places on a 2-dimensional grid while simultaneously avoiding your enemies. You can turn the tables at one point in the game: Your enemies turn into chickens and you can catch them. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -44,8 +44,7 @@ flavor looks like this: | 7 | RIGHTFIRE | | 8 | LEFTFIRE | | 9 | DOWNFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -67,8 +66,7 @@ via `gymnasium.make`. ### Rewards You score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Amidar-v5") @@ -87,7 +85,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Amidar-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/assault.md b/docs/environments/atari/assault.md index c8263cf3b..f4ac1f901 100644 --- a/docs/environments/atari/assault.md +++ b/docs/environments/atari/assault.md @@ -18,11 +18,11 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Assault-v5")` | -### Description +## Description You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones. You must destroy these enemies and dodge their attacks. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=827). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -38,8 +38,7 @@ flavor looks like this: | 4 | LEFT | | 5 | RIGHTFIRE | | 6 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -56,8 +55,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Assault-v5") @@ -75,7 +73,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Assault-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/asterix.md b/docs/environments/atari/asterix.md index be138b463..d289fc155 100644 --- a/docs/environments/atari/asterix.md +++ b/docs/environments/atari/asterix.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Asterix-v5")` | -### Description +## Description You are Asterix and can move horizontally (continuously) and vertically (discretely). Objects move horizontally across the screen: lyres and other (more useful) objects. Your goal is to guide Asterix in such a way as to avoid lyres and collect as many other objects as possible. You score points by collecting @@ -26,7 +26,7 @@ objects and lose a life whenever you collect a lyre. You have three lives availa many points, you will be awarded additional points. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -44,8 +44,7 @@ flavor looks like this: | 6 | UPLEFT | | 7 | DOWNRIGHT | | 8 | DOWNLEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -65,8 +64,7 @@ via `gymnasium.make`. ### Rewards A table of scores awarded for collecting the different objects is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Asterix-v5") @@ -84,7 +82,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Asterix-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/asteroids.md b/docs/environments/atari/asteroids.md index 0a32736a2..42315e192 100644 --- a/docs/environments/atari/asteroids.md +++ b/docs/environments/atari/asteroids.md @@ -18,13 +18,13 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Asteroids-v5")` | -### Description +## Description This is a well-known arcade game: You control a spaceship in an asteroid field and must break up asteroids by shooting them. Once all asteroids are destroyed, you enter a new level and new asteroids will appear. You will occasionally be attacked by a flying saucer. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -49,8 +49,7 @@ flavor looks like this: | 13 | UPLEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -72,8 +71,7 @@ via `gymnasium.make`. You score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score for destroying it. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Asteroids-v5") @@ -92,7 +90,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Asteroids-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/atlantis.md b/docs/environments/atari/atlantis.md index 9f821bc5e..40249465a 100644 --- a/docs/environments/atari/atlantis.md +++ b/docs/environments/atari/atlantis.md @@ -19,14 +19,14 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Atlantis-v5")` | -### Description +## Description Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must destroy them before they reach striking distance. To this end, you control three defense posts. You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations after you have fought of a wave of enemies and scored a sufficient number of points. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -39,8 +39,7 @@ flavor looks like this: | 1 | FIRE | | 2 | RIGHTFIRE | | 3 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -63,8 +62,7 @@ via `gymnasium.make`. You score points for destroying enemies, keeping installations protected during attack waves. You score more points if you manage to destroy your enemies with one of the outer defense posts. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Atlantis-v5") @@ -83,7 +81,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Amidar-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/bank_heist.md b/docs/environments/atari/bank_heist.md index bc998bae4..8d562face 100644 --- a/docs/environments/atari/bank_heist.md +++ b/docs/environments/atari/bank_heist.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/BankHeist-v5")` | -### Description +## Description You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars by dropping sticks of dynamite. You can fill up your gas tank by entering a new city. @@ -26,12 +26,11 @@ At the beginning of the game you have four lives. Lives are lost if you run out or run over the dynamite you have previously dropped. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -54,8 +53,7 @@ via `gymnasium.make`. You score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city, you will score extra points. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/BankHeist-v5") @@ -74,7 +72,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("BankHeist-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/battle_zone.md b/docs/environments/atari/battle_zone.md index 6cc291b5f..257a53e38 100644 --- a/docs/environments/atari/battle_zone.md +++ b/docs/environments/atari/battle_zone.md @@ -19,20 +19,19 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/BattleZone-v5")` | -### Description +## Description You control a tank and must destroy enemy vehicles. This game is played in a first-person perspective and creates a 3D illusion. A radar screen shows enemies around you. You start with 5 lives and gain up to 2 extra lives if you reach a sufficient score. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -54,8 +53,7 @@ via `gymnasium.make`. You receive points for destroying enemies. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/BattleZone-v5") @@ -74,7 +72,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("BattleZone-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/beam_rider.md b/docs/environments/atari/beam_rider.md index aa7a5cf28..e7852a49d 100644 --- a/docs/environments/atari/beam_rider.md +++ b/docs/environments/atari/beam_rider.md @@ -25,12 +25,12 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/BeamRider-v5")` | -### Description +## Description You control a space-ship that travels forward at a constant speed. You can only steer it sideways between discrete positions. Your goal is to destroy enemy ships, avoid their attacks and dodge space debris. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -51,8 +51,7 @@ flavor looks like this: - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -73,8 +72,7 @@ via `gymnasium.make`. ### Rewards You score points for destroying enemies. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_thumbs.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/BeamRider-v5") @@ -93,7 +91,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("BeamRider-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/berzerk.md b/docs/environments/atari/berzerk.md index 75d4141c9..ddff3927c 100644 --- a/docs/environments/atari/berzerk.md +++ b/docs/environments/atari/berzerk.md @@ -18,19 +18,18 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Berzerk-v5")` | -### Description +## Description You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode. You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -51,8 +50,7 @@ via `gymnasium.make`. ### Rewards You score points for destroying robots. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Berzerk-v5") @@ -71,7 +69,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Berzerk-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/bowling.md b/docs/environments/atari/bowling.md index 64626dc10..8ed491a6f 100644 --- a/docs/environments/atari/bowling.md +++ b/docs/environments/atari/bowling.md @@ -18,13 +18,13 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Bowling-v5")` | -### Description +## Description Your goal is to score as many points as possible in the game of Bowling. A game consists of 10 frames and you have two tries per frame. Knocking down all pins on the first try is called a "strike". Knocking down all pins on the second roll is called a "spar". Otherwise, the frame is called "open". Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -41,8 +41,7 @@ flavor looks like this: | 5 | DOWNFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -65,8 +64,7 @@ You receive points for knocking down pins. The exact score depends on whether yo frame. Moreover, the points you score for one frame may depend on following frames. You can score up to 300 points in one game (if you manage to do 12 strikes). For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Bowling-v5") @@ -85,7 +83,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Bowling-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/boxing.md b/docs/environments/atari/boxing.md index a41d705e2..f9871790a 100644 --- a/docs/environments/atari/boxing.md +++ b/docs/environments/atari/boxing.md @@ -18,17 +18,16 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Boxing-v5")` | -### Description +## Description You fight an opponent in a boxing ring. You score points for hitting the opponent. If you score 100 points, your opponent is knocked out. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -50,8 +49,7 @@ via `gymnasium.make`. ### Rewards You score points by landing punches. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Boxing-v5") @@ -70,7 +68,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Boxing-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/breakout.md b/docs/environments/atari/breakout.md index d4285a0c2..3f26799a9 100644 --- a/docs/environments/atari/breakout.md +++ b/docs/environments/atari/breakout.md @@ -18,13 +18,13 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Breakout-v5")` | -### Description +## Description Another famous Atari game. The dynamics are similar to pong: You move a paddle and hit the ball in a brick wall at the top of the screen. Your goal is to destroy the brick wall. You can try to break through the wall and let the ball wreak havoc on the other side, all on its own! You have five lives. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -37,8 +37,7 @@ flavor looks like this: | 1 | FIRE | | 2 | RIGHT | | 3 | LEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -60,8 +59,7 @@ via `gymnasium.make`. ### Rewards You score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Breakout-v5") @@ -80,7 +78,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Breakout-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/carnival.md b/docs/environments/atari/carnival.md index 1f2f3a059..b3aadc509 100644 --- a/docs/environments/atari/carnival.md +++ b/docs/environments/atari/carnival.md @@ -18,13 +18,13 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Carnival-v5")` | -### Description +## Description This is a "shoot 'em up" game. Targets move horizontally across the screen and you must shoot them. You are in control of a gun that can be moved horizontally. The supply of ammunition is limited and chickens may steal some bullets from you if you don't hit them in time. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -40,8 +40,7 @@ flavor looks like this: | 3 | LEFT | | 4 | RIGHTFIRE | | 5 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -64,8 +63,7 @@ via `gymnasium.make`. You score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign. You will score extra points if it shows a plus sign! For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Carnival-v5") @@ -84,7 +82,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Carnival-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/centipede.md b/docs/environments/atari/centipede.md index 45e91629d..210e4f4c4 100644 --- a/docs/environments/atari/centipede.md +++ b/docs/environments/atari/centipede.md @@ -18,21 +18,20 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Centipede-v5")` | -### Description +## Description You are an elf and must use your magic wands to fend off spiders, fleas and centipedes. Your goal is to protect mushrooms in an enchanted forest. If you are bitten by a spider, flea or centipede, you will be temporally paralyzed and you will lose a magic wand. The game ends once you have lost all wands. You may receive additional wands after scoring a sufficient number of points. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -55,8 +54,7 @@ You score points by hitting centipedes, scorpions, fleas and spiders. Additional (i.e. after you have lost a wand) for mushrooms that were not destroyed. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Centipede-v5") @@ -75,7 +73,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Centipede-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/chopper_command.md b/docs/environments/atari/chopper_command.md index c0ebd6a11..143053a6b 100644 --- a/docs/environments/atari/chopper_command.md +++ b/docs/environments/atari/chopper_command.md @@ -18,18 +18,17 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/ChopperCommand-v5")` | -### Description +## Description You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft. A mini-map is displayed at the bottom of the screen. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -52,8 +51,7 @@ You score points by destroying planes and other helicopters. You score extra poi of trucks that have survived. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/ChopperCommand-v5") @@ -72,7 +70,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("ChopperCommand-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/crazy_climber.md b/docs/environments/atari/crazy_climber.md index bf66cf509..293748f32 100644 --- a/docs/environments/atari/crazy_climber.md +++ b/docs/environments/atari/crazy_climber.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/CrazyClimber-v5")` | -### Description +## Description You are a climber trying to reach the top of four builidings, while avoiding obstacles like closing windows and falling objects. When you receive damage (windows closing or objects) you will fall and lose one life; you have a total of 5 lives before the end games. At the top of each building, there's @@ -28,7 +28,7 @@ possible while receiving the least amount of damage. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -46,8 +46,7 @@ flavor looks like this: | 6 | UPLEFT | | 7 | DOWNRIGHT | | 8 | DOWNLEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -67,8 +66,7 @@ via `gymnasium.make`. ### Rewards A table of scores awarded for completing each row of a building is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/CrazyClimber-v5") @@ -87,7 +85,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("CrazyClimber-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/defender.md b/docs/environments/atari/defender.md index 615f2a463..a3c50f686 100644 --- a/docs/environments/atari/defender.md +++ b/docs/environments/atari/defender.md @@ -19,7 +19,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Defender-v5")` | -### Description +## Description Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids. You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship. Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of @@ -27,13 +27,12 @@ laser missiles. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -55,8 +54,7 @@ via `gymnasium.make`. You receive points for destroying enemies, rescuing abducted humans and keeping humans alive. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Defender-v5") @@ -75,7 +73,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Defender-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/demon_attack.md b/docs/environments/atari/demon_attack.md index b1590d367..a6afadaa0 100644 --- a/docs/environments/atari/demon_attack.md +++ b/docs/environments/atari/demon_attack.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/DemonAttack-v5")` | -### Description +## Description You are facing waves of demons in the ice planet of Krybor. Points are accumulated by destroying demons. You begin with 3 reserve bunkers, and can increase its number (up to 6) by avoiding enemy attacks. Each attack wave you survive without any hits, grants you a new bunker. Every time an enemy @@ -27,7 +27,7 @@ the game ends. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -45,8 +45,7 @@ flavor looks like this: | 6 | UPLEFT | | 7 | DOWNRIGHT | | 8 | DOWNLEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -68,8 +67,7 @@ via `gymnasium.make`. Each enemy you slay gives you points. The amount of points depends on the type of demon and which wave you are in. A detailed table of scores is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/DemonAttack-v5") @@ -88,7 +86,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("DemonAttack-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/double_dunk.md b/docs/environments/atari/double_dunk.md index d6666146b..6b5d3c10e 100644 --- a/docs/environments/atari/double_dunk.md +++ b/docs/environments/atari/double_dunk.md @@ -18,14 +18,14 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/DoubleDunk-v5")` | -### Description +## Description You are playing a 2v2 game of basketball. At the start of each possession, you select between a set of different plays and then execute them to either score or prevent your rivals from scoring. The game lasts a set amount of time or until one of the teams reaches a certain score Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=153). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -43,8 +43,7 @@ flavor looks like this: | 6 | UPLEFT | | 7 | DOWNRIGHT | | 8 | DOWNLEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -66,8 +65,7 @@ via `gymnasium.make`. Scores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1 point. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/DoubleDunk-v5") @@ -86,7 +84,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("DoubleDunk-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/elevator_action.md b/docs/environments/atari/elevator_action.md index d0b99b148..fc7d7d279 100644 --- a/docs/environments/atari/elevator_action.md +++ b/docs/environments/atari/elevator_action.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/ElevatorAction-v5")` | -### Description +## Description You are a secret agent that must retrieve some secret documents and reach the ground level of a building by going down an elevator/stairs. Once you reach the ground level, you are picked up and taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting @@ -28,7 +28,7 @@ marked with a red door, which contain the secret documents. This is an unreleased prototype based on the arcade game. Limited documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=1131). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -46,8 +46,7 @@ flavor looks like this: | 6 | UPLEFT | | 7 | DOWNRIGHT | | 8 | DOWNLEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -69,8 +68,7 @@ via `gymnasium.make`. You start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each secret document collected (visiting a red door). Each time you get shot you lose one life and the game ends when losing all lives. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/ElevatorAction-v5") @@ -89,7 +87,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("ElevatorAction-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/enduro.md b/docs/environments/atari/enduro.md index e8f57b81c..05411851c 100644 --- a/docs/environments/atari/enduro.md +++ b/docs/environments/atari/enduro.md @@ -18,14 +18,14 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Enduro-v5")` | -### Description +## Description You are a racer in the National Enduro, a long-distance endurance race. You must overtake a certain amount of cars each day to stay on the race. The first day you need to pass 200 cars, and 300 for each following day. The game ends if you do not meet your overtake quota for the day. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=163). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -43,8 +43,7 @@ flavor looks like this: | 6 | UPLEFT | | 7 | DOWNRIGHT | | 8 | DOWNLEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -64,8 +63,7 @@ via `gymnasium.make`. ### Rewards You get 1 point for each vehicle you overtake. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Enduro-v5") @@ -84,7 +82,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Enduro-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/fishing_derby.md b/docs/environments/atari/fishing_derby.md index 3e8a31431..ecd34c0b0 100644 --- a/docs/environments/atari/fishing_derby.md +++ b/docs/environments/atari/fishing_derby.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/FishingDerby-v5")` | -### Description +## Description your objective is to catch more sunfish than your opponent. But it's not just between you and the other fisherman, as a big, black shark is lurking just below the surface, waiting to steal your catch! Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=182). ### Rewards @@ -58,8 +58,7 @@ legal space by passing the keyword argument `full_action_space=True` to `make`. The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the action spaces of default flavors. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -76,8 +75,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/FishingDerby-v5") @@ -95,7 +93,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("FishingDerby-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/freeway.md b/docs/environments/atari/freeway.md index 5ec4fe4fc..06ad3eae7 100644 --- a/docs/environments/atari/freeway.md +++ b/docs/environments/atari/freeway.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Freeway-v5")` | -### Description +## Description your objective is to guide your chicken across lane after lane of busy rush hour traffic. You receive a point for every chicken that makes it to the top of the screen after crossing all the lanes of traffic. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=192). ### Rewards @@ -58,8 +58,7 @@ legal space by passing the keyword argument `full_action_space=True` to `make`. The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the action spaces of default flavors. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -76,8 +75,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Freeway-v5") @@ -96,7 +94,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Freeway-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/frostbite.md b/docs/environments/atari/frostbite.md index 4de74296f..b4d8928f3 100644 --- a/docs/environments/atari/frostbite.md +++ b/docs/environments/atari/frostbite.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Frostbite-v5")` | -### Description +## Description In Frostbite, the player controls "Frostbite Bailey" who hops back and forth across across an Arctic river, changing the color of the ice blocks from white to blue. Each time he does so, a block is added to his igloo. [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=199). ### Rewards @@ -58,8 +58,7 @@ legal space by passing the keyword argument `full_action_space=True` to `make`. The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the action spaces of default flavors. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -76,8 +75,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Frostbite-v5") @@ -96,7 +94,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Frostbite-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/gopher.md b/docs/environments/atari/gopher.md index ac7017b05..eeda6b949 100644 --- a/docs/environments/atari/gopher.md +++ b/docs/environments/atari/gopher.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Gopher-v5")` | -### Description +## Description The player controls a shovel-wielding farmer who protects a crop of three carrots from a gopher. [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=218). ### Rewards @@ -58,8 +58,7 @@ legal space by passing the keyword argument `full_action_space=True` to `make`. The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the action spaces of default flavors. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -76,8 +75,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Gopher-v5") @@ -96,7 +94,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Gopher-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/gravitar.md b/docs/environments/atari/gravitar.md index b00073308..9a6aedbb9 100644 --- a/docs/environments/atari/gravitar.md +++ b/docs/environments/atari/gravitar.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Gravitar-v5")` | -### Description +## Description The player controls a small blue spacecraft. The game starts in a fictional solar system with several planets to explore. If the player moves his ship into a planet, he will be taken to a side-view landscape. Player has to destroy red bunkers [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=223). ### Rewards @@ -58,8 +58,7 @@ legal space by passing the keyword argument `full_action_space=True` to `make`. The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the action spaces of default flavors. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -76,8 +75,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Gravitar-v5") @@ -96,7 +94,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Gravitar-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/hero.md b/docs/environments/atari/hero.md index f58878ff5..ee5998af5 100644 --- a/docs/environments/atari/hero.md +++ b/docs/environments/atari/hero.md @@ -19,7 +19,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Hero-v5")` | -### Description +## Description You need to rescue miners that are stuck in a mine shaft. You have access to various tools: A propeller backpack that allows you to fly wherever you want, sticks of dynamite that can be used to blast through walls, a laser beam to kill vermin, and a raft to float across stretches of lava. @@ -27,13 +27,12 @@ You have a limited amount of power. Once you run out, you lose a live. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -56,8 +55,7 @@ You score points for shooting critters, rescuing miners, and dynamiting walls. Extra points are rewarded for any power remaining after rescuing a miner. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Hero-v5") @@ -76,7 +74,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Hero-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/ice_hockey.md b/docs/environments/atari/ice_hockey.md index c9f0b7115..b22b9a33c 100644 --- a/docs/environments/atari/ice_hockey.md +++ b/docs/environments/atari/ice_hockey.md @@ -18,19 +18,18 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/IceHockey-v5")` | -### Description +## Description Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called "the puck". There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal. Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -53,8 +52,7 @@ via `gymnasium.make`. You score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner. There are no limits to how many points you can get per game, other than the time limit of 3-minute games. For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/IceHockey-v5") @@ -73,7 +71,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Icehockey-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/jamesbond.md b/docs/environments/atari/jamesbond.md index d1d1190b3..3269cf137 100644 --- a/docs/environments/atari/jamesbond.md +++ b/docs/environments/atari/jamesbond.md @@ -18,20 +18,19 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Jamesbond-v5")` | -### Description +## Description Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions. The craft moves forward with a right motion and slightly back with a left motion. An up or down motion causes the craft to jump or dive. You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -54,8 +53,7 @@ via `gymnasium.make`. The game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score. There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007. For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Jamesbond-v5") @@ -74,7 +72,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Jamesbond-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/journey_escape.md b/docs/environments/atari/journey_escape.md index 816818e90..802430059 100644 --- a/docs/environments/atari/journey_escape.md +++ b/docs/environments/atari/journey_escape.md @@ -18,12 +18,12 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/JourneyEscape-v5")` | -### Description +## Description You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out. You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -48,8 +48,7 @@ flavor looks like this: | 15 | UPLEFTFIRE | | 16 | DOWNRIGHTFIRE | | 17 | DOWNLEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -72,8 +71,7 @@ via `gymnasium.make`. At the start of the game, you will have $50,000 and 60 units of time. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/JourneyEscape-v5") @@ -92,7 +90,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("JourneyEscape-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/kangaroo.md b/docs/environments/atari/kangaroo.md index c54af55d4..ad648e0b7 100644 --- a/docs/environments/atari/kangaroo.md +++ b/docs/environments/atari/kangaroo.md @@ -18,18 +18,17 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Kangaroo-v5")` | -### Description +## Description The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives. During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -52,8 +51,7 @@ via `gymnasium.make`. Your score will be shown at the top right corner of the game. Your end game score with be dependent on how much time you have remaining and who you encounter along the way. For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Kangaroo-v5") @@ -72,7 +70,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Kangaroo-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/krull.md b/docs/environments/atari/krull.md index 25af2f6aa..f6b888a56 100644 --- a/docs/environments/atari/krull.md +++ b/docs/environments/atari/krull.md @@ -18,18 +18,17 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Krull-v5")` | -### Description +## Description Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast. The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -52,8 +51,7 @@ via `gymnasium.make`. You will receive various scores for each monster you kill. You can play the game until you have lost all your lives. For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Krull-v5") @@ -72,7 +70,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Krull-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/kung_fu_master.md b/docs/environments/atari/kung_fu_master.md index 12c90ddb9..f7db00836 100644 --- a/docs/environments/atari/kung_fu_master.md +++ b/docs/environments/atari/kung_fu_master.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/KungFuMaster-v5")` | -### Description +## Description You are a Kung-Fu Master fighting your way through the Evil Wizard's temple. Your goal is to rescue Princess Victoria, defeating various enemies along the way. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=268). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -40,8 +40,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 11 | UPLEFTFIRE | | 12 | DOWNRIGHTFIRE | | 13 | DOWNLEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -58,8 +57,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/KungFuMaster-v5") @@ -76,7 +74,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("KungFuMaster-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/montezuma_revenge.md b/docs/environments/atari/montezuma_revenge.md index 4b584f7c9..88a2b9c77 100644 --- a/docs/environments/atari/montezuma_revenge.md +++ b/docs/environments/atari/montezuma_revenge.md @@ -18,15 +18,14 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/MontezumaRevenge-v5")` | -### Description +## Description Your goal is to acquire Montezuma's treasure by making your way through a maze of chambers within the emperor's fortress. You must avoid deadly creatures while collecting valuables and tools which can help you escape with the treasure. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=310). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -43,8 +42,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/MontezumaRevenge-v5") @@ -61,7 +59,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("MontezumaRevenge-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/ms_pacman.md b/docs/environments/atari/ms_pacman.md index 8a6808c87..985494835 100644 --- a/docs/environments/atari/ms_pacman.md +++ b/docs/environments/atari/ms_pacman.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/MsPacman-v5")` | -### Description +## Description Your goal is to collect all of the pellets on the screen while avoiding the ghosts. -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -35,8 +35,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 6 | UPLEFT | | 7 | DOWNRIGHT | | 8 | DOWNLEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -53,8 +52,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/MsPacman-v5") @@ -71,7 +69,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("MsPacman-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/name_this_game.md b/docs/environments/atari/name_this_game.md index 230c7a877..28f683d86 100644 --- a/docs/environments/atari/name_this_game.md +++ b/docs/environments/atari/name_this_game.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/NameThisGame-v5")` | -### Description +## Description Your goal is to defend the treasure that you have discovered. You must fight off a shark and an octopus while keeping an eye on your oxygen supply. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=323). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -33,8 +33,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 4 | RIGHTFIRE | | 5 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -51,8 +50,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/NameThisGame-v5") @@ -69,7 +67,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("NameThisGame-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/phoenix.md b/docs/environments/atari/phoenix.md index 30b15a602..1336a0953 100644 --- a/docs/environments/atari/phoenix.md +++ b/docs/environments/atari/phoenix.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Phoenix-v5")` | -### Description +## Description Your goal is to reach and shoot the alien pilot. On your way there, you must eliminate waves of war birds while avoiding their bombs. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=355). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -34,8 +34,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 5 | RIGHTFIRE | | 6 | LEFTFIRE | | 7 | DOWNFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -52,8 +51,7 @@ instead. The respective observation spaces are respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Phoenix-v5") @@ -70,7 +68,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Phoenix-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/pitfall.md b/docs/environments/atari/pitfall.md index 28507b04f..02d23eaf9 100644 --- a/docs/environments/atari/pitfall.md +++ b/docs/environments/atari/pitfall.md @@ -18,16 +18,15 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Pitfall-v5")` | -### Description +## Description You control Pitfall Harry and are tasked with collecting all the treasures in a jungle within 20 minutes. You have three lives. The game is over if you collect all the treasures or if you die or if the time runs out. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -47,8 +46,7 @@ via `gymnasium.make`. ### Rewards You get score points for collecting treasure, you lose points through some misfortunes like falling down a hole. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Pitfall-v5") @@ -67,7 +65,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Pitfall-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/pong.md b/docs/environments/atari/pong.md index e80087e4d..ee7b6864a 100644 --- a/docs/environments/atari/pong.md +++ b/docs/environments/atari/pong.md @@ -18,11 +18,11 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Pong-v5")` | -### Description +## Description You control the right paddle, you compete against the left paddle controlled by the computer. You each try to keep deflecting the ball away from your goal and into your opponent's goal. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default @@ -38,8 +38,7 @@ flavor looks like this: | 5 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -59,8 +58,7 @@ via `gymnasium.make`. ### Rewards You get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Pong-v5") @@ -79,7 +77,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Pong-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/pooyan.md b/docs/environments/atari/pooyan.md index 71e4c377c..5f39596ee 100644 --- a/docs/environments/atari/pooyan.md +++ b/docs/environments/atari/pooyan.md @@ -18,12 +18,12 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Pooyan-v5")` | -### Description +## Description You are a mother pig protecting her piglets (Pooyans) from wolves. In the first scene, you can move up and down a rope. Try to shoot the worker's balloons, while guarding yourself from attacks. If the wolves reach the ground safely they will get behind and try to eat you. In the second scene, the wolves try to float up. You have to try and stop them using arrows and bait. You die if a wolf eats you, or a stone or rock hits you. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default @@ -37,8 +37,7 @@ flavor looks like this: | 3 | DOWN | | 4 | UPFIRE | | 5 | DOWNFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -58,8 +57,7 @@ via `gymnasium.make`. ### Rewards If you hit a balloon, wolf or stone with an arrow you score points. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Pooyan-v5") @@ -78,7 +76,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Pooyan-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/private_eye.md b/docs/environments/atari/private_eye.md index 85863a925..bc3868750 100644 --- a/docs/environments/atari/private_eye.md +++ b/docs/environments/atari/private_eye.md @@ -18,16 +18,15 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/PrivateEye-v5")` | -### Description +## Description You control the French Private Eye Pierre Touche. Navigate the city streets, parks, secret passages, dead-ends and one-ways in search of the ringleader, Henri Le Fiend and his gang. You also need to find evidence and stolen goods that are scattered about. There are five cases, complete each case before its statute of limitations expires. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -47,8 +46,7 @@ via `gymnasium.make`. ### Rewards You score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/PrivateEye-v5") @@ -67,7 +65,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("PrivateEye-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/qbert.md b/docs/environments/atari/qbert.md index 0a3d1292b..848d8dc6e 100644 --- a/docs/environments/atari/qbert.md +++ b/docs/environments/atari/qbert.md @@ -18,11 +18,11 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Qbert-v5")` | -### Description +## Description You are Q*bert. Your goal is to change the color of all the cubes on the pyramid to the pyramid's 'destination' color. To do this, you must hop on each cube on the pyramid one at a time while avoiding nasty creatures that lurk there. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default @@ -37,8 +37,7 @@ flavor looks like this: | 4 | LEFT | | 5 | DOWN | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -58,8 +57,7 @@ via `gymnasium.make`. ### Rewards You score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Qbert-v5") @@ -78,7 +76,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Qbert-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/riverraid.md b/docs/environments/atari/riverraid.md index 25d509bb6..9e24ab24b 100644 --- a/docs/environments/atari/riverraid.md +++ b/docs/environments/atari/riverraid.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Riverraid-v5")` | -### Description +## Description You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards). You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low. @@ -29,10 +29,9 @@ The game begins with a squadron of three jets in reserve and you're given an add Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -62,8 +61,7 @@ Score points are your only reward. You get score points each time you destroy an | Bridge | 500 | For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Riverraid-v5") @@ -82,7 +80,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Riverraid-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/road_runner.md b/docs/environments/atari/road_runner.md index 6275a294e..5709c2939 100644 --- a/docs/environments/atari/road_runner.md +++ b/docs/environments/atari/road_runner.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/RoadRunner-v0")` | -### Description +## Description You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps. The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert. @@ -32,10 +32,9 @@ destroying the coyote. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -64,8 +63,7 @@ Score points are your only reward. You get score points each time you: | get the coyote hit by a truck | 1000 | For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/RoadRunner-v5") @@ -84,7 +82,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("RoadRunner-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/robotank.md b/docs/environments/atari/robotank.md index 833f51059..c7c4b3219 100644 --- a/docs/environments/atari/robotank.md +++ b/docs/environments/atari/robotank.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Robotank-v0")` | -### Description +## Description You control your Robot Tanks to destroy enemies and avoid enemy fire. Game ends when all of your Robot Tanks are @@ -34,10 +34,9 @@ You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -63,8 +62,7 @@ A small tank appears at the top of your screen for each enemy destroyed. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Robotank-v5") @@ -83,7 +81,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Robotank-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/seaquest.md b/docs/environments/atari/seaquest.md index 87043e151..d423e3b74 100644 --- a/docs/environments/atari/seaquest.md +++ b/docs/environments/atari/seaquest.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Seaquest-v0")` | -### Description +## Description You control a sub able to move in all directions and fire torpedoes. The goal is to retrieve as many divers as you can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly. @@ -37,10 +37,9 @@ to surface, with less than six divers, you lose one diver as well. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -73,8 +72,7 @@ moment you surface. The more oxygen you have left, the more bonus points you're given. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Seaquest-v5") @@ -93,7 +91,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Seaquest-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/skiing.md b/docs/environments/atari/skiing.md index 391022515..ca58de2cc 100644 --- a/docs/environments/atari/skiing.md +++ b/docs/environments/atari/skiing.md @@ -18,7 +18,7 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Skiing-v0")` | -### Description +## Description You control a skier who can move sideways. The goal is to run through all gates (between the poles) in the fastest time. @@ -29,7 +29,7 @@ and keep going. But you do lose time, so be careful! Detailed documentation can be found on [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend @@ -41,8 +41,7 @@ flavor looks like this: | 0 | NOOP | | 1 | RIGHT | | 2 | LEFT | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -64,8 +63,7 @@ via `gymnasium.make`. Seconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds. For a more detailed documentation, see [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Skiing-v5") @@ -84,7 +82,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Skiing-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/solaris.md b/docs/environments/atari/solaris.md index 170b5c9bf..0a41c788e 100644 --- a/docs/environments/atari/solaris.md +++ b/docs/environments/atari/solaris.md @@ -18,14 +18,13 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Solaris-v5")` | -### Description +## Description You control a spaceship. Blast enemies before they can blast you. You can warp to different sectors. You have to defend Federation planets, and destroy Zylon forces. Keep track of your fuel, if you run out you lose a life. Warp to a Federation planet to refuel. The game ends if all your ships are destroyed or if you reach the Solaris planet. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -45,8 +44,7 @@ via `gymnasium.make`. ### Rewards You gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Solaris-v5") @@ -65,7 +63,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Solaris-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/space_invaders.md b/docs/environments/atari/space_invaders.md index 990662673..e82082862 100644 --- a/docs/environments/atari/space_invaders.md +++ b/docs/environments/atari/space_invaders.md @@ -18,11 +18,11 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/SpaceInvaders-v5")` | -### Description +## Description Your objective is to destroy the space invaders by shooting your laser cannon at them before they reach the Earth. The game ends when all your lives are lost after taking enemy fire, or when they reach the earth. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default @@ -37,8 +37,7 @@ flavor looks like this: | 4 | RIGHTFIRE | | 5 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -58,8 +57,7 @@ via `gymnasium.make`. ### Rewards You gain points for destroying space invaders. The invaders in the back rows are worth more points. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/SpaceInvaders-v5") @@ -78,7 +76,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("SpaceInvaders-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/star_gunner.md b/docs/environments/atari/star_gunner.md index 9e79ee051..5a311a7e3 100644 --- a/docs/environments/atari/star_gunner.md +++ b/docs/environments/atari/star_gunner.md @@ -18,11 +18,11 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/StarGunner-v5")` | -### Description +## Description Stop the alien invasion by shooting down alien saucers and creatures while avoiding bombs. More details can be found on [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default @@ -37,8 +37,7 @@ flavor looks like this: | 4 | LEFT | | 5 | DOWN | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -59,8 +58,7 @@ via `gymnasium.make`. You score points for destroying enemies. You get bonus points for clearing a wave and a level. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/StarGunner-v5") @@ -79,7 +77,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("StarGunner-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/tennis.md b/docs/environments/atari/tennis.md index cd99bb8a5..9232fc76d 100644 --- a/docs/environments/atari/tennis.md +++ b/docs/environments/atari/tennis.md @@ -18,17 +18,16 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Tennis-v5")` | -### Description +## Description You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis. The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -48,8 +47,7 @@ via `gymnasium.make`. ### Rewards The scoring is as per the sport of tennis, played till one set. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Tennis-v5") @@ -68,7 +66,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Tennis-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/time_pilot.md b/docs/environments/atari/time_pilot.md index 5533df106..17e8e1c7f 100644 --- a/docs/environments/atari/time_pilot.md +++ b/docs/environments/atari/time_pilot.md @@ -18,11 +18,11 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/TimePilot-v5")` | -### Description +## Description You control an aircraft. Use it to destroy your enemies. As you progress in the game, you encounter enemies with technology that is increasingly from the future. More details can be found on [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html) -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default @@ -41,8 +41,7 @@ flavor looks like this: | 8 | LEFTFIRE | | 9 | DOWNFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console - A grayscale image @@ -64,8 +63,7 @@ via `gymnasium.make`. You score points for destroying enemies, gaining more points for difficult enemies. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html). - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/TimePilot-v5") @@ -83,7 +81,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("TimePilot-v0")`. -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/tutankham.md b/docs/environments/atari/tutankham.md index ca17f3910..5d8452827 100644 --- a/docs/environments/atari/tutankham.md +++ b/docs/environments/atari/tutankham.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Tutankham-v5")` | -### Description +## Description Your goal is to rack up points by finding treasures in the mazes of the tomb while eliminating its guardians. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=572). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -34,8 +34,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 5 | UPFIRE | | 6 | RIGHTFIRE | | 7 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -52,8 +51,7 @@ instead. The respective observation spaces are The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Tutankham-v5") @@ -70,7 +68,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Tutankham-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/up_n_down.md b/docs/environments/atari/up_n_down.md index 9454349e6..36efb1d53 100644 --- a/docs/environments/atari/up_n_down.md +++ b/docs/environments/atari/up_n_down.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/UpNDown-v5")` | -### Description +## Description Your goal is to steer your baja bugger to collect prizes and eliminate opponents. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=574). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -32,8 +32,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 3 | DOWN | | 4 | UPFIRE | | 5 | DOWNFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -50,8 +49,7 @@ instead. The respective observation spaces are The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/UpNDown-v5") @@ -69,7 +67,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("UpNDown-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/venture.md b/docs/environments/atari/venture.md index 8beb523ef..712974fde 100644 --- a/docs/environments/atari/venture.md +++ b/docs/environments/atari/venture.md @@ -18,13 +18,12 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Venture-v5")` | -### Description +## Description Your goal is to capture the treasure in every chamber of the dungeon while eliminating the monsters. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=576). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify full_action_space=False during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -41,8 +40,7 @@ instead. The respective observation spaces are The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Venture-v5") @@ -61,7 +59,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Venture-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/video_pinball.md b/docs/environments/atari/video_pinball.md index e588a1ff0..a74e1b5d7 100644 --- a/docs/environments/atari/video_pinball.md +++ b/docs/environments/atari/video_pinball.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/VideoPinball-v5")` | -### Description +## Description Your goal is to keep the ball in play as long as possible and to score as many points as possible. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=588). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -35,8 +35,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 6 | UPFIRE | | 7 | RIGHTFIRE | | 8 | LEFTFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -53,8 +52,7 @@ instead. The respective observation spaces are The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/'VideoPinball-v5") @@ -73,7 +71,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("VideoPinball-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/wizard_of_wor.md b/docs/environments/atari/wizard_of_wor.md index 91a828ea4..1dde108d5 100644 --- a/docs/environments/atari/wizard_of_wor.md +++ b/docs/environments/atari/wizard_of_wor.md @@ -18,10 +18,10 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/WizardOfWor-v5")` | -### Description +## Description Your goal is to beat the Wizard using your laser and radar scanner. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=598). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this: | Num | Action | @@ -36,8 +36,7 @@ By default, all actions that can be performed on an Atari 2600 are available in | 7 | RIGHTFIRE | | 8 | LEFTFIRE | | 9 | DOWNFIRE | - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -54,8 +53,7 @@ instead. The respective observation spaces are The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/WizardOfWor-v5") @@ -74,7 +72,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("WizardOfWor-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/atari/zaxxon.md b/docs/environments/atari/zaxxon.md index 411209006..fa0950a23 100644 --- a/docs/environments/atari/zaxxon.md +++ b/docs/environments/atari/zaxxon.md @@ -19,13 +19,12 @@ This environment is part of the Atari environments. Please read | Observation Low | 0 | | Import | `gymnasium.make("ALE/Zaxxon-v5")` | -### Description +## Description Your goal is to stop the evil robot Zaxxon and its armies from enslaving the galaxy by piloting your fighter and shooting enemies. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=606). -### Actions +## Actions By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify full_action_space=False during initialization, all actions will be available in the default flavor. - -### Observations +## Observations By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe - The 128 Bytes of RAM of the console @@ -42,8 +41,7 @@ instead. The respective observation spaces are The general article on Atari environments outlines different ways to instantiate corresponding environments via `gymnasium.make`. - -### Arguments +## Arguments ``` env = gymnasium.make("ALE/Zaxxon-v5") @@ -60,7 +58,7 @@ are available. These are no longer supported in v5. In order to obtain equivalen the general article on Atari environments. The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Zaxxon-v0")` -### Version History +## Version History A thorough discussion of the intricate differences between the versions and configurations can be found in the general article on Atari environments. diff --git a/docs/environments/box2d.md b/docs/environments/box2d.md index 1886996f0..f0bf6c199 100644 --- a/docs/environments/box2d.md +++ b/docs/environments/box2d.md @@ -3,7 +3,7 @@ firstpage: lastpage: --- -## Box2D +# Box2D ```{toctree} :hidden: diff --git a/docs/environments/classic_control.md b/docs/environments/classic_control.md index fa19d87b8..fb005959c 100644 --- a/docs/environments/classic_control.md +++ b/docs/environments/classic_control.md @@ -3,7 +3,7 @@ firstpage: lastpage: --- -## Classic Control +# Classic Control ```{toctree} :hidden: diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md index a5deeacf1..243c812af 100644 --- a/docs/environments/mujoco.md +++ b/docs/environments/mujoco.md @@ -3,21 +3,22 @@ firstpage: lastpage: --- -## MuJoCo +# MuJoCo ```{toctree} :hidden: -ant -half_cheetah -hopper -humanoid_standup -humanoid -inverted_double_pendulum -inverted_pendulum -reacher -swimmer -walker2d +mujoco/ant +mujoco/half_cheetah +mujoco/hopper +mujoco/humanoid_standup +mujoco/humanoid +mujoco/inverted_double_pendulum +mujoco/inverted_pendulum +mujoco/reacher +mujoco/swimmer +mujoco/pusher +mujoco/walker2d ``` ```{raw} html diff --git a/docs/environments/toy_text.md b/docs/environments/toy_text.md index dfd4d06b1..2bfab7a96 100644 --- a/docs/environments/toy_text.md +++ b/docs/environments/toy_text.md @@ -3,7 +3,7 @@ firstpage: lastpage: --- -## Toy Text +# Toy Text ```{toctree} :hidden: diff --git a/docs/index.md b/docs/index.md index 405be3369..b118ea9dc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,7 @@ firstpage: lastpage: --- -## Gymnasium is a standard API for reinforcement learning, and a diverse collection of reference environments +# Gymnasium is a standard API for reinforcement learning, and a diverse collection of reference environments ```{figure} _static/videos/box2d/lunar_lander_continuous.gif @@ -39,7 +39,8 @@ content/basic_usage :hidden: :caption: API -api/core +api/env +api/registry api/spaces api/wrappers api/vector @@ -50,11 +51,11 @@ api/utils :hidden: :caption: Environments -environments/atari -environments/mujoco -environments/toy_text environments/classic_control environments/box2d +environments/toy_text +environments/mujoco +environments/atari environments/third_party_environments ``` @@ -73,5 +74,4 @@ content/handling_timelimits Github Donate - ``` diff --git a/gymnasium/core.py b/gymnasium/core.py index dd9e6fe89..64788adb5 100644 --- a/gymnasium/core.py +++ b/gymnasium/core.py @@ -33,29 +33,32 @@ RenderFrame = TypeVar("RenderFrame") class Env(Generic[ObsType, ActType]): - r"""The main Gymnasium class. + r"""The main Gymnasium class for implementing Reinforcement Learning Agents environments. - It encapsulates an environment with arbitrary behind-the-scenes dynamics. - An environment can be partially or fully observed. + The class encapsulates an environment with arbitrary behind-the-scenes dynamics through the :meth:`step` and :meth:`reset` functions. + An environment can be partially or fully observed by single agents. For multi-agent environments, see PettingZoo. The main API methods that users of this class need to know are: - - :meth:`step` - Takes a step in the environment using an action returning the next observation, reward, - if the environment terminated and observation information. - - :meth:`reset` - Resets the environment to an initial state, returning the initial observation and observation information. - - :meth:`render` - Renders the environment observation with modes depending on the output - - :meth:`close` - Closes the environment, important for rendering where pygame is imported + - :meth:`step` - Updates an environment with actions returning the next agent observation, the reward for taking that actions, + if the environment has terminated or truncated due to the latest action and information from the environment about the step, i.e. metrics, debug info. + - :meth:`reset` - Resets the environment to an initial state, required before calling step. + Returns the first agent observation for an episode and information, i.e. metrics, debug info. + - :meth:`render` - Renders the environments to help visualise what the agent see, examples modes are "human", "rgb_array", "ansi" for text. + - :meth:`close` - Closes the environment, important when external software is used, i.e. pygame for rendering, databases - And set the following attributes: + Environments have additional attributes for users to understand the implementation - - :attr:`action_space` - The Space object corresponding to valid actions - - :attr:`observation_space` - The Space object corresponding to valid observations - - :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards - - :attr:`spec` - An environment spec that contains the information used to initialize the environment from `gymnasium.make` - - :attr:`metadata` - The metadata of the environment, i.e. render modes - - :attr:`np_random` - The random number generator for the environment + - :attr:`action_space` - The Space object corresponding to valid actions, all valid actions should be contained within the space. + - :attr:`observation_space` - The Space object corresponding to valid observations, all valid observations should be contained within the space. + - :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards for an agent over an episode. + The default reward range is set to :math:`(-\infty,+\infty)`. + - :attr:`spec` - An environment spec that contains the information used to initialize the environment from :meth:`gymnasium.make` + - :attr:`metadata` - The metadata of the environment, i.e. render modes, render fps + - :attr:`np_random` - The random number generator for the environment. This is automatically assigned during + ``super().reset(seed=seed)`` and when assessing ``self.np_random``. - Note: a default reward range set to :math:`(-\infty,+\infty)` already exists. Set it if you want a narrower range. + .. seealso:: For modifying or extending environments use the :py:class:`gymnasium.Wrapper` class """ # Set this in SOME subclasses @@ -72,40 +75,41 @@ class Env(Generic[ObsType, ActType]): # Created _np_random: Optional[np.random.Generator] = None - @property - def np_random(self) -> np.random.Generator: - """Returns the environment's internal :attr:`_np_random` that if not set will initialize with a random seed.""" - if self._np_random is None: - self._np_random, seed = seeding.np_random() - return self._np_random - - @np_random.setter - def np_random(self, value: np.random.Generator): - self._np_random = value - def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]: - """Run one timestep of the environment's dynamics. + """Run one timestep of the environment's dynamics using the agent actions. - When end of episode is reached, you are responsible for calling :meth:`reset` to reset this environment's state. - Accepts an action and returns either a tuple `(observation, reward, terminated, truncated, info)`. + When the end of an episode is reached (``terminated or truncated``), it is necessary to call :meth:`reset` to + reset this environment's state for the next episode. + + .. versionchanged:: 0.26 + + The Step API was changed removing ``done`` in favor of ``terminated`` and ``truncated`` to make it clearer + to users when the environment had terminated or truncated which is critical for reinforcement learning + bootstrapping algorithms. Args: - action (ActType): an action provided by the agent + action (ActType): an action provided by the agent to update the environment state. Returns: - observation (object): this will be an element of the environment's :attr:`observation_space`. - This may, for instance, be a numpy array containing the positions and velocities of certain objects. - reward (float): The amount of reward returned as a result of taking the action. - terminated (bool): whether a `terminal state` (as defined under the MDP of the task) is reached. - In this case further step() calls could return undefined results. - truncated (bool): whether a truncation condition outside the scope of the MDP is satisfied. - Typically a timelimit, but could also be used to indicate an agent physically going out of bounds. - Can be used to end the episode prematurely before a `terminal state` is reached. - info (dictionary): `info` contains auxiliary diagnostic information (helpful for debugging, learning, and logging). + observation (ObsType): An element of the environment's :attr:`observation_space` as the next observation due to the agent actions. + An example is a numpy array containing the positions and velocities of the pole in CartPole. + reward (float): The reward as a result of taking the action. + terminated (bool): Whether the agent reaches the terminal state (as defined under the MDP of the task) + which can be positive or negative. An example is reaching the goal state or moving into the lava from + the Sutton and Barton, Gridworld. If true, the user needs to call :meth:`reset`. + truncated (bool): Whether the truncation condition outside the scope of the MDP is satisfied. + Typically, this is a timelimit, but could also be used to indicate an agent physically going out of bounds. + Can be used to end the episode prematurely before a terminal state is reached. + If true, the user needs to call :meth:`reset`. + info (dict): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain: metrics that describe the agent's performance state, variables that are hidden from observations, or individual reward terms that are combined to produce the total reward. - It also can contain information that distinguishes truncation and termination, however, this is deprecated in favor - of returning two booleans, and will be removed in a future version. + In OpenAI Gym Tuple[ObsType, dict]: - """Resets the environment to an initial state and returns the initial observation. + """Resets the environment to an initial internal state, returning an initial observation and info. - This method can reset the environment's random number generator(s) if ``seed`` is an integer or - if the environment has not yet initialized a random number generator. - If the environment already has a random number generator and :meth:`reset` is called with ``seed=None``, - the RNG should not be reset. Moreover, :meth:`reset` should (in the typical use case) be called with an - integer seed right after initialization and then never again. + This method generates a new starting state often with some randomness to ensure that the agent explores the + state space and learns a generalised policy about the environment. This randomness can be controlled + with the ``seed`` parameter otherwise if the environment already has a random number generator and + :meth:`reset` is called with ``seed=None``, the RNG is not reset. + + Therefore, :meth:`reset` should (in the typical use case) be called with a seed right after initialization and then never again. + + For Custom environments, the first line of :meth:`reset` should be ``super().reset(seed=seed)`` which implements + the seeding correctly. + + .. versionchanged:: v0.25 + + The ``return_info`` parameter was removed and now info is expected to be returned. Args: - seed (optional int): The seed that is used to initialize the environment's PRNG. + seed (optional int): The seed that is used to initialize the environment's PRNG (`np_random`). If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset. @@ -134,9 +146,8 @@ class Env(Generic[ObsType, ActType]): options (optional dict): Additional information to specify how the environment is reset (optional, depending on the specific environment) - Returns: - observation (object): Observation of the initial state. This will be an element of :attr:`observation_space` + observation (ObsType): Observation of the initial state. This will be an element of :attr:`observation_space` (typically a numpy array) and is analogous to the observation returned by :meth:`step`. info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to the ``info`` returned by :meth:`step`. @@ -146,45 +157,76 @@ class Env(Generic[ObsType, ActType]): self._np_random, seed = seeding.np_random(seed) def render(self) -> Optional[Union[RenderFrame, List[RenderFrame]]]: - """Compute the render frames as specified by render_mode attribute during initialization of the environment. + """Compute the render frames as specified by :attr:`render_mode` during the initialization of the environment. - The set of supported modes varies per environment. (And some - third-party environments may not support rendering at all.) - By convention, if render_mode is: - - - None (default): no render is computed. - - human: render return None. - The environment is continuously rendered in the current display or terminal. Usually for human consumption. - - rgb_array: return a single frame representing the current state of the environment. - A frame is a numpy.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image. - - rgb_array_list: return a list of frames representing the states of the environment since the last reset. - Each frame is a numpy.ndarray with shape (x, y, 3), as with `rgb_array`. - - ansi: Return a strings (str) or StringIO.StringIO containing a - terminal-style text representation for each time step. - The text can include newlines and ANSI escape sequences (e.g. for colors). + The environment's :attr:`metadata` render modes (`env.metadata["render_modes"]`) should contain the possible + ways to implement the render modes. In addition, list versions for most render modes is achieved through + `gymnasium.make` which automatically applies a wrapper to collect rendered frames. Note: - Make sure that your class's metadata 'render_modes' key includes - the list of supported modes. It's recommended to call super() - in implementations to use the functionality of this method. + As the :attr:`render_mode` is known during ``__init__``, the objects used to render the environment state + should be initialised in ``__init__``. + + By convention, if the :attr:`render_mode` is: + + - None (default): no render is computed. + - "human": The environment is continuously rendered in the current display or terminal, usually for human consumption. + This rendering should occur during :meth:`step` and :meth:`render` doesn't need to be called. Returns ``None``. + - "rgb_array": Return a single frame representing the current state of the environment. + A frame is a ``np.ndarray`` with shape ``(x, y, 3)`` representing RGB values for an x-by-y pixel image. + - "ansi": Return a strings (``str``) or ``StringIO.StringIO`` containing a terminal-style text representation + for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors). + - "rgb_array_list" and "ansi_list": List based version of render modes are possible (except Human) through the + wrapper, :py:class:`gymnasium.wrappers.RenderCollection` that is automatically applied during ``gymnasium.make(..., render_mode="rgb_array_list")``. + The frames collected are popped after :meth:`render` is called or :meth:`reset`. + + Note: + Make sure that your class's :attr:`metadata` ``"render_modes"`` key includes the list of supported modes. + + .. versionchanged:: 0.25.0 + + The render function was changed to no longer accept parameters, rather these parameters should be specified + in the environment initialised, i.e., ``gymnasium.make("CartPole-v1", render_mode="human")`` """ raise NotImplementedError def close(self): - """Override close in your subclass to perform any necessary cleanup.""" + """After the user has finished using the environment, close contains the code necessary to "clean up" the environment. + + This is critical for closing rendering windows, database or HTTP connections. + """ pass @property def unwrapped(self) -> "Env": - """Returns the base non-wrapped environment. + """Returns the base non-wrapped environment (i.e., removes all wrappers). Returns: - Env: The base non-wrapped gymnasium.Env instance + Env: The base non-wrapped :class:`gymnasium.Env` instance """ return self + @property + def np_random(self) -> np.random.Generator: + """Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed. + + Returns: + Instances of `np.random.Generator` + """ + if self._np_random is None: + self._np_random, seed = seeding.np_random() + return self._np_random + + @np_random.setter + def np_random(self, value: np.random.Generator): + self._np_random = value + def __str__(self): - """Returns a string of the environment with the spec id if specified.""" + """Returns a string of the environment with :attr:`spec` id's if :attr:`spec. + + Returns: + A string identifying the environment + """ if self.spec is None: return f"<{type(self).__name__} instance>" else: @@ -195,21 +237,67 @@ class Env(Generic[ObsType, ActType]): return self def __exit__(self, *args): - """Support with-statement for the environment.""" + """Support with-statement for the environment and closes the environment.""" self.close() # propagate exception return False class Wrapper(Env[ObsType, ActType]): - """Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods. + """Wraps a :class:`gymnasium.Env` to allow a modular transformation of the :meth:`step` and :meth:`reset` methods. - This class is the base class for all wrappers. The subclass could override - some methods to change the behavior of the original environment without touching the - original code. + This class is the base class of all wrappers to change the behavior of the underlying environment allowing + modification to the :attr:`action_space`, :attr:`observation_space`, :attr:`reward_range` and :attr:`metadata` + that doesn't change the underlying environment attributes. + + In addition, for several attributes (:attr:`spec`, :attr:`render_mode`, :attr:`np_random`) will point back to the + wrapper's environment. + + Wrappers are a convenient way to modify an existing environment without having to alter the underlying code directly. + Using wrappers will allow you to avoid a lot of boilerplate code and make your environment more modular. Wrappers can + also be chained to combine their effects. Most environments that are generated via `gymnasium.make` will already be wrapped by default. + + In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along + with (possibly optional) parameters to the wrapper's constructor. + + >>> import gymnasium as gym + >>> from gymnasium.wrappers import RescaleAction + >>> base_env = gym.make("BipedalWalker-v3") + >>> base_env.action_space + Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32) + >>> wrapped_env = RescaleAction(base_env, min_action=0, max_action=1) + >>> wrapped_env.action_space + Box([0. 0. 0. 0.], [1. 1. 1. 1.], (4,), float32) + + You can access the environment underneath the **first** wrapper by using the :attr:`env` attribute. + As the :class:`Wrapper` class inherits from :class:`Env` then :attr:`env` can be another wrapper. + + >>> wrapped_env + >>>> + >>> wrapped_env.env + >>> + + If you want to get to the environment underneath **all** of the layers of wrappers, you can use the `.unwrapped` attribute. + If the environment is already a bare environment, the `.unwrapped` attribute will just return itself. + + >>> wrapped_env + >>>> + >>> wrapped_env.unwrapped + + + There are three common things you might want a wrapper to do: + + - Transform actions before applying them to the base environment + - Transform observations that are returned by the base environment + - Transform rewards that are returned by the base environment + + Such wrappers can be easily implemented by inheriting from `ActionWrapper`, `ObservationWrapper`, or `RewardWrapper` and implementing the + respective transformation. If you need a wrapper to do more complicated tasks, you can inherit from the `Wrapper` class directly. + The code that is presented in the following sections can also be found in + the [gym-examples](https://github.com/Farama-Foundation/gym-examples) repository Note: - Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. + Don't forget to call ``super().__init__(env)`` """ def __init__(self, env: Env): @@ -233,7 +321,7 @@ class Wrapper(Env[ObsType, ActType]): @property def spec(self): - """Returns the environment specification.""" + """Returns the :attr:`Env` :attr:`spec` attribute.""" return self.env.spec @classmethod @@ -243,7 +331,7 @@ class Wrapper(Env[ObsType, ActType]): @property def action_space(self) -> spaces.Space[ActType]: - """Returns the action space of the environment.""" + """Return the :attr:`Env` :attr:`action_space` unless overwritten then the wrapper :attr:`action_space` is used.""" if self._action_space is None: return self.env.action_space return self._action_space @@ -254,7 +342,7 @@ class Wrapper(Env[ObsType, ActType]): @property def observation_space(self) -> spaces.Space: - """Returns the observation space of the environment.""" + """Return the :attr:`Env` :attr:`observation_space` unless overwritten then the wrapper :attr:`observation_space` is used.""" if self._observation_space is None: return self.env.observation_space return self._observation_space @@ -265,7 +353,7 @@ class Wrapper(Env[ObsType, ActType]): @property def reward_range(self) -> Tuple[SupportsFloat, SupportsFloat]: - """Return the reward range of the environment.""" + """Return the :attr:`Env` :attr:`reward_range` unless overwritten then the wrapper :attr:`reward_range` is used.""" if self._reward_range is None: return self.env.reward_range return self._reward_range @@ -276,7 +364,7 @@ class Wrapper(Env[ObsType, ActType]): @property def metadata(self) -> dict: - """Returns the environment metadata.""" + """Returns the :attr:`Env` :attr:`metadata`.""" if self._metadata is None: return self.env.metadata return self._metadata @@ -287,12 +375,12 @@ class Wrapper(Env[ObsType, ActType]): @property def render_mode(self) -> Optional[str]: - """Returns the environment render_mode.""" + """Returns the :attr:`Env` :attr:`render_mode`.""" return self.env.render_mode @property def np_random(self) -> np.random.Generator: - """Returns the environment np_random.""" + """Returns the :attr:`Env` :attr:`np_random` attribute.""" return self.env.np_random @np_random.setter @@ -306,25 +394,25 @@ class Wrapper(Env[ObsType, ActType]): ) def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]: - """Steps through the environment with action.""" + """Uses the :meth:`step` of the :attr:`env` that can be overwritten to change the returned data.""" return self.env.step(action) def reset(self, **kwargs) -> Tuple[ObsType, dict]: - """Resets the environment with kwargs.""" + """Uses the :meth:`reset` of the :attr:`env` that can be overwritten to change the returned data.""" return self.env.reset(**kwargs) def render( self, *args, **kwargs ) -> Optional[Union[RenderFrame, List[RenderFrame]]]: - """Renders the environment.""" + """Uses the :meth:`render` of the :attr:`env` that can be overwritten to change the returned data.""" return self.env.render(*args, **kwargs) def close(self): - """Closes the environment.""" + """Closes the wrapper and :attr:`env`.""" return self.env.close() def __str__(self): - """Returns the wrapper name and the unwrapped environment string.""" + """Returns the wrapper name and the :attr:`env` representation string.""" return f"<{type(self).__name__}{self.env}>" def __repr__(self): @@ -340,12 +428,11 @@ class Wrapper(Env[ObsType, ActType]): class ObservationWrapper(Wrapper): """Superclass of wrappers that can modify observations using :meth:`observation` for :meth:`reset` and :meth:`step`. - If you would like to apply a function to the observation that is returned by the base environment before + If you would like to apply a function to only the observation before passing it to the learning code, you can simply inherit from :class:`ObservationWrapper` and overwrite the method :meth:`observation` to implement that transformation. The transformation defined in that method must be - defined on the base environment’s observation space. However, it may take values in a different space. - In that case, you need to specify the new observation space of the wrapper by setting :attr:`self.observation_space` - in the :meth:`__init__` method of your wrapper. + reflected by the :attr:`env` observation space. Otherwise, you need to specify the new observation space of the + wrapper by setting :attr:`self.observation_space` in the :meth:`__init__` method of your wrapper. For example, you might have a 2D navigation task where the environment returns dictionaries as observations with keys ``"agent_position"`` and ``"target_position"``. A common thing to do might be to throw away some degrees of @@ -366,17 +453,24 @@ class ObservationWrapper(Wrapper): """ def reset(self, **kwargs): - """Resets the environment, returning a modified observation using :meth:`self.observation`.""" + """Modifies the :attr:`env` after calling :meth:`reset`, returning a modified observation using :meth:`self.observation`.""" obs, info = self.env.reset(**kwargs) return self.observation(obs), info def step(self, action): - """Returns a modified observation using :meth:`self.observation` after calling :meth:`env.step`.""" + """Modifies the :attr:`env` after calling :meth:`step` using :meth:`self.observation` on the returned observations.""" observation, reward, terminated, truncated, info = self.env.step(action) return self.observation(observation), reward, terminated, truncated, info def observation(self, observation): - """Returns a modified observation.""" + """Returns a modified observation. + + Args: + observation: The :attr:`env` observation + + Returns: + The modified observation + """ raise NotImplementedError @@ -386,14 +480,14 @@ class RewardWrapper(Wrapper): If you would like to apply a function to the reward that is returned by the base environment before passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method :meth:`reward` to implement that transformation. - This transformation might change the reward range; to specify the reward range of your wrapper, + This transformation might change the :attr:`reward_range`; to specify the :attr:`reward_range` of your wrapper, you can simply define :attr:`self.reward_range` in :meth:`__init__`. Let us look at an example: Sometimes (especially when we do not have control over the reward because it is intrinsic), we want to clip the reward to a range to gain some numerical stability. To do that, we could, for instance, implement the following wrapper:: - class ClipReward(gym.RewardWrapper): + class ClipReward(gymnasium.RewardWrapper): def __init__(self, env, min_reward, max_reward): super().__init__(env) self.min_reward = min_reward @@ -405,12 +499,19 @@ class RewardWrapper(Wrapper): """ def step(self, action): - """Modifies the reward using :meth:`self.reward` after the environment :meth:`env.step`.""" + """Modifies the :attr:`env` :meth:`step` reward using :meth:`self.reward`.""" observation, reward, terminated, truncated, info = self.env.step(action) return observation, self.reward(reward), terminated, truncated, info def reward(self, reward): - """Returns a modified ``reward``.""" + """Returns a modified environment ``reward``. + + Args: + reward: The :attr:`env` :meth:`step` reward + + Returns: + The modified `reward` + """ raise NotImplementedError @@ -442,18 +543,20 @@ class ActionWrapper(Wrapper): np.array([0,1]), np.array([0,-1])]) print(wrapped_env.action_space) #Discrete(4) - - Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction`. + Among others, Gymnasium provides the action wrappers :class:`ClipAction` and :class:`RescaleAction` for clipping and rescaling actions. """ def step(self, action): - """Runs the environment :meth:`env.step` using the modified ``action`` from :meth:`self.action`.""" + """Runs the :attr:`env` :meth:`env.step` using the modified ``action`` from :meth:`self.action`.""" return self.env.step(self.action(action)) def action(self, action): - """Returns a modified action before :meth:`env.step` is called.""" - raise NotImplementedError + """Returns a modified action before :meth:`env.step` is called. - def reverse_action(self, action): - """Returns a reversed ``action``.""" + Args: + action: The original :meth:`step` actions + + Returns: + The modified actions + """ raise NotImplementedError diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index 1d27ae751..5f078f58f 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -102,7 +102,7 @@ class ContactDetector(contactListener): class BipedalWalker(gym.Env, EzPickle): """ - ### Description + ## Description This is a simple 4-joint walker robot environment. There are two versions: - Normal, with slightly uneven terrain. @@ -117,30 +117,30 @@ class BipedalWalker(gym.Env, EzPickle): python gymnasium/envs/box2d/bipedal_walker.py ``` - ### Action Space + ## Action Space Actions are motor speed values in the [-1, 1] range for each of the 4 joints at both hips and knees. - ### Observation Space + ## Observation Space State consists of hull angle speed, angular velocity, horizontal speed, vertical speed, position of joints and joints angular speed, legs contact with ground, and 10 lidar rangefinder measurements. There are no coordinates in the state vector. - ### Rewards + ## Rewards Reward is given for moving forward, totaling 300+ points up to the far end. If the robot falls, it gets -100. Applying motor torque costs a small amount of points. A more optimal agent will get a better score. - ### Starting State + ## Starting State The walker starts standing at the left end of the terrain with the hull horizontal, and both legs in the same position with a slight knee angle. - ### Episode Termination + ## Episode Termination The episode will terminate if the hull gets in contact with the ground or if the walker exceeds the right end of the terrain length. - ### Arguments + ## Arguments To use to the _hardcore_ environment, you need to specify the `hardcore=True` argument like below: ```python @@ -148,7 +148,7 @@ class BipedalWalker(gym.Env, EzPickle): env = gym.make("BipedalWalker-v3", hardcore=True) ``` - ### Version History + ## Version History - v3: Returns the closest lidar trace instead of furthest; faster video recording - v2: Count energy spent @@ -157,9 +157,9 @@ class BipedalWalker(gym.Env, EzPickle): - v0: Initial version - + - ### Credits + ## Credits Created by Oleg Klimov """ diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py index 8cb9b5210..db6822a86 100644 --- a/gymnasium/envs/box2d/car_racing.py +++ b/gymnasium/envs/box2d/car_racing.py @@ -106,7 +106,7 @@ class FrictionDetector(contactListener): class CarRacing(gym.Env, EzPickle): """ - ### Description + ## Description The easiest control task to learn from pixels - a top-down racing environment. The generated track is random every episode. @@ -120,31 +120,31 @@ class CarRacing(gym.Env, EzPickle): Remember: it's a powerful rear-wheel drive car - don't press the accelerator and turn at the same time. - ### Action Space + ## Action Space If continuous: There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking. If discrete: There are 5 actions: do nothing, steer left, steer right, gas, brake. - ### Observation Space + ## Observation Space A top-down 96x96 RGB image of the car and race track. - ### Rewards + ## Rewards The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points. - ### Starting State + ## Starting State The car starts at rest in the center of the road. - ### Episode Termination + ## Episode Termination The episode finishes when all the tiles are visited. The car can also go outside the playfield - that is, far off the track, in which case it will receive -100 reward and die. - ### Arguments + ## Arguments `lap_complete_percent` dictates the percentage of tiles that must be visited by the agent before a lap is considered complete. @@ -154,7 +154,7 @@ class CarRacing(gym.Env, EzPickle): Passing `continuous=False` converts the environment to use discrete action space. The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. - ### Reset Arguments + ## Reset Arguments Passing the option `options["randomize"] = True` will change the current colour of the environment on demand. Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment. `domain_randomize` must be `True` on init for this argument to work. @@ -173,14 +173,14 @@ class CarRacing(gym.Env, EzPickle): env.reset(options={"randomize": False}) ``` - ### Version History + ## Version History - v1: Change track completion logic and add domain randomization (0.24.0) - v0: Original version - ### References + ## References - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car. - ### Credits + ## Credits Created by Oleg Klimov """ diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 85f2d1172..29bc67f32 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -76,7 +76,7 @@ class ContactDetector(contactListener): class LunarLander(gym.Env, EzPickle): """ - ### Description + ## Description This environment is a classic rocket trajectory optimization problem. According to Pontryagin's maximum principle, it is optimal to fire the engine at full throttle or turn it off. This is the reason why this @@ -95,16 +95,16 @@ class LunarLander(gym.Env, EzPickle): - ### Action Space + ## Action Space There are four discrete actions available: do nothing, fire left orientation engine, fire main engine, fire right orientation engine. - ### Observation Space + ## Observation Space The state is an 8-dimensional vector: the coordinates of the lander in `x` & `y`, its linear velocities in `x` & `y`, its angle, its angular velocity, and two booleans that represent whether each leg is in contact with the ground or not. - ### Rewards + ## Rewards After every step a reward is granted. The total reward of an episode is the sum of the rewards for all the steps within that episode. @@ -120,11 +120,11 @@ class LunarLander(gym.Env, EzPickle): An episode is considered a solution if it scores at least 200 points. - ### Starting State + ## Starting State The lander starts at the top center of the viewport with a random initial force applied to its center of mass. - ### Episode Termination + ## Episode Termination The episode finishes if: 1) the lander crashes (the lander body gets in contact with the moon); 2) the lander gets outside of the viewport (`x` coordinate is greater than 1); @@ -137,7 +137,7 @@ class LunarLander(gym.Env, EzPickle): > wakes up. Bodies will also wake up if a joint or contact attached to > them is destroyed. - ### Arguments + ## Arguments To use to the _continuous_ environment, you need to specify the `continuous=True` argument like below: ```python @@ -172,16 +172,16 @@ class LunarLander(gym.Env, EzPickle): `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0. `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0. - ### Version History + ## Version History - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points, and -10 if then lose contact; reward renormalized to 200; harder initial random push. - v0: Initial version - + - ### Credits + ## Credits Created by Oleg Klimov """ diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py index 147eb6c5f..fc335d068 100644 --- a/gymnasium/envs/classic_control/acrobot.py +++ b/gymnasium/envs/classic_control/acrobot.py @@ -26,7 +26,7 @@ __author__ = "Christoph Dann " class AcrobotEnv(Env): """ - ### Description + ## Description The Acrobot environment is based on Sutton's work in ["Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding"](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html) @@ -41,7 +41,7 @@ class AcrobotEnv(Env): to reach the target height (black horizontal line above system) by applying torque on the actuator. - ### Action Space + ## Action Space The action is discrete, deterministic, and represents the torque applied on the actuated joint between the two links. @@ -52,7 +52,7 @@ class AcrobotEnv(Env): | 1 | apply 0 torque to the actuated joint | torque (N m) | | 2 | apply 1 torque to the actuated joint | torque (N m) | - ### Observation Space + ## Observation Space The observation is a `ndarray` with shape `(6,)` that provides information about the two rotational joint angles as well as their angular velocities: @@ -75,25 +75,25 @@ class AcrobotEnv(Env): The angular velocities of `theta1` and `theta2` are bounded at ±4π, and ±9π rad/s respectively. A state of `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards. - ### Rewards + ## Rewards The goal is to have the free end reach a designated target height in as few steps as possible, and as such all steps that do not reach the goal incur a reward of -1. Achieving the target height results in termination with a reward of 0. The reward threshold is -100. - ### Starting State + ## Starting State Each parameter in the underlying state (`theta1`, `theta2`, and the two angular velocities) is initialized uniformly between -0.1 and 0.1. This means both links are pointing downwards with some initial stochasticity. - ### Episode End + ## Episode End The episode ends if one of the following occurs: 1. Termination: The free end reaches the target height, which is constructed as: `-cos(theta1) - cos(theta2 + theta1) > 1.0` 2. Truncation: Episode length is greater than 500 (200 for v0) - ### Arguments + ## Arguments No additional arguments are currently supported during construction. @@ -125,14 +125,14 @@ class AcrobotEnv(Env): by setting `book_or_nips = 'nips'` - ### Version History + ## Version History - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the sine and cosine of each angle instead. - v0: Initial versions release (1.0.0) (removed from gymnasium for v1) - ### References + ## References - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8). MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py index 68f34677c..c173de7a7 100644 --- a/gymnasium/envs/classic_control/cartpole.py +++ b/gymnasium/envs/classic_control/cartpole.py @@ -16,7 +16,7 @@ from gymnasium.error import DependencyNotInstalled class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): """ - ### Description + ## Description This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in ["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077). @@ -24,7 +24,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left and right direction on the cart. - ### Action Space + ## Action Space The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction of the fixed force the cart is pushed with. @@ -37,7 +37,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): **Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it - ### Observation Space + ## Observation Space The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities: @@ -55,16 +55,16 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): - The pole angle can be observed between `(-.418, .418)` radians (or **±24°**), but the episode terminates if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**) - ### Rewards + ## Rewards Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken, including the termination step, is allotted. The threshold for rewards is 475 for v1. - ### Starting State + ## Starting State All observations are assigned a uniformly random value in `(-0.05, 0.05)` - ### Episode End + ## Episode End The episode ends if any one of the following occurs: @@ -72,7 +72,7 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): 2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display) 3. Truncation: Episode length is greater than 500 (200 for v0) - ### Arguments + ## Arguments ```python import gymnasium as gym diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py index 97357538b..4d9460020 100644 --- a/gymnasium/envs/classic_control/continuous_mountain_car.py +++ b/gymnasium/envs/classic_control/continuous_mountain_car.py @@ -26,7 +26,7 @@ from gymnasium.error import DependencyNotInstalled class Continuous_MountainCarEnv(gym.Env): """ - ### Description + ## Description The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically at the bottom of a sinusoidal valley, with the only possible actions being the accelerations @@ -46,7 +46,7 @@ class Continuous_MountainCarEnv(gym.Env): } ``` - ### Observation Space + ## Observation Space The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following: @@ -55,12 +55,12 @@ class Continuous_MountainCarEnv(gym.Env): | 0 | position of the car along the x-axis | -Inf | Inf | position (m) | | 1 | velocity of the car | -Inf | Inf | position (m) | - ### Action Space + ## Action Space The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car. The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015. - ### Transition Dynamics: + ## Transition Dynamics: Given an action, the mountain car follows the following transition dynamics: @@ -72,24 +72,24 @@ class Continuous_MountainCarEnv(gym.Env): The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall. The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07]. - ### Reward + ## Reward A negative reward of *-0.1 * action2* is received at each timestep to penalise for taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100 is added to the negative reward for that timestep. - ### Starting State + ## Starting State The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`. The starting velocity of the car is always assigned to 0. - ### Episode End + ## Episode End The episode ends if either of the following happens: 1. Termination: The position of the car is greater than or equal to 0.45 (the goal position on top of the right hill) 2. Truncation: The length of the episode is 999. - ### Arguments + ## Arguments ```python import gymnasium as gym @@ -99,7 +99,7 @@ class Continuous_MountainCarEnv(gym.Env): On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. - ### Version History + ## Version History * v0: Initial versions release (1.0.0) """ diff --git a/gymnasium/envs/classic_control/mountain_car.py b/gymnasium/envs/classic_control/mountain_car.py index b725e6d88..3dabf5698 100644 --- a/gymnasium/envs/classic_control/mountain_car.py +++ b/gymnasium/envs/classic_control/mountain_car.py @@ -15,7 +15,7 @@ from gymnasium.error import DependencyNotInstalled class MountainCarEnv(gym.Env): """ - ### Description + ## Description The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically at the bottom of a sinusoidal valley, with the only possible actions being the accelerations @@ -35,7 +35,7 @@ class MountainCarEnv(gym.Env): } ``` - ### Observation Space + ## Observation Space The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following: @@ -44,7 +44,7 @@ class MountainCarEnv(gym.Env): | 0 | position of the car along the x-axis | -Inf | Inf | position (m) | | 1 | velocity of the car | -Inf | Inf | position (m) | - ### Action Space + ## Action Space There are 3 discrete deterministic actions: @@ -54,7 +54,7 @@ class MountainCarEnv(gym.Env): | 1 | Don't accelerate | Inf | position (m) | | 2 | Accelerate to the right | Inf | position (m) | - ### Transition Dynamics: + ## Transition Dynamics: Given an action, the mountain car follows the following transition dynamics: @@ -66,24 +66,24 @@ class MountainCarEnv(gym.Env): upon collision with the wall. The position is clipped to the range `[-1.2, 0.6]` and velocity is clipped to the range `[-0.07, 0.07]`. - ### Reward: + ## Reward: The goal is to reach the flag placed on top of the right hill as quickly as possible, as such the agent is penalised with a reward of -1 for each timestep. - ### Starting State + ## Starting State The position of the car is assigned a uniform random value in *[-0.6 , -0.4]*. The starting velocity of the car is always assigned to 0. - ### Episode End + ## Episode End The episode ends if either of the following happens: 1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill) 2. Truncation: The length of the episode is 200. - ### Arguments + ## Arguments ```python import gymnasium as gym @@ -93,7 +93,7 @@ class MountainCarEnv(gym.Env): On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. - ### Version History + ## Version History * v0: Initial versions release (1.0.0) """ diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index aa18222ed..908ce2ec8 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -16,7 +16,7 @@ DEFAULT_Y = 1.0 class PendulumEnv(gym.Env): """ - ### Description + ## Description The inverted pendulum swingup problem is based on the classic problem in control theory. The system consists of a pendulum attached at one end to a fixed point, and the other end being free. @@ -26,13 +26,13 @@ class PendulumEnv(gym.Env): The diagram below specifies the coordinate system used for the implementation of the pendulum's dynamic equations. - ![Pendulum Coordinate System](./diagrams/pendulum.png) + ![Pendulum Coordinate System](/_static/diagrams/pendulum.png) - `x-y`: cartesian coordinates of the pendulum's end in meters. - `theta` : angle in radians. - `tau`: torque in `N m`. Defined as positive _counter-clockwise_. - ### Action Space + ## Action Space The action is a `ndarray` with shape `(1,)` representing the torque applied to free end of the pendulum. @@ -41,7 +41,7 @@ class PendulumEnv(gym.Env): | 0 | Torque | -2.0 | 2.0 | - ### Observation Space + ## Observation Space The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free end and its angular velocity. @@ -52,7 +52,7 @@ class PendulumEnv(gym.Env): | 1 | y = sin(theta) | -1.0 | 1.0 | | 2 | Angular Velocity | -8.0 | 8.0 | - ### Rewards + ## Rewards The reward function is defined as: @@ -63,15 +63,15 @@ class PendulumEnv(gym.Env): *-(pi2 + 0.1 * 82 + 0.001 * 22) = -16.2736044*, while the maximum reward is zero (pendulum is upright with zero velocity and no torque applied). - ### Starting State + ## Starting State The starting state is a random angle in *[-pi, pi]* and a random angular velocity in *[-1,1]*. - ### Episode Truncation + ## Episode Truncation The episode truncates at 200 time steps. - ### Arguments + ## Arguments - `g`: acceleration of gravity measured in *(m s-2)* used to calculate the pendulum dynamics. The default value is g = 10.0 . @@ -84,7 +84,7 @@ class PendulumEnv(gym.Env): On reset, the `options` parameter allows the user to change the bounds used to determine the new random state. - ### Version History + ## Version History * v1: Simplify the math equations, no difference in behavior. * v0: Initial versions release (1.0.0) diff --git a/gymnasium/envs/mujoco/ant_v4.py b/gymnasium/envs/mujoco/ant_v4.py index d6e3f9a2d..64beb9267 100644 --- a/gymnasium/envs/mujoco/ant_v4.py +++ b/gymnasium/envs/mujoco/ant_v4.py @@ -11,7 +11,7 @@ DEFAULT_CAMERA_CONFIG = { class AntEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment is based on the environment introduced by Schulman, Moritz, Levine, Jordan and Abbeel in ["High-Dimensional Continuous Control @@ -22,7 +22,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): torques on the eight hinges connecting the two links of each leg and the torso (nine parts and eight hinges). - ### Action Space + ## Action Space The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -36,7 +36,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): | 6 | Torque applied on the rotor between the torso and back right hip | -1 | 1 | hip_4 (right_back_leg) | hinge | torque (N m) | | 7 | Torque applied on the rotor between the back right two links | -1 | 1 | angle_4 (right_back_leg) | hinge | torque (N m) | - ### Observation Space + ## Observation Space Observations consist of positional values of different body parts of the ant, followed by the velocities of those individual parts (their derivatives) with all @@ -98,7 +98,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): when using the Ant environment if you would like to report results with contact forces (if contact forces are not used in your experiments, you can use version > 2.0). - ### Rewards + ## Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the ant is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward` - *forward_reward*: A reward of moving forward which is measured as @@ -115,7 +115,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms. - ### Starting State + ## Starting State All observations start in state (0.0, 0.0, 0.75, 1.0, 0.0 ... 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional values and standard normal noise @@ -124,7 +124,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): to be slightly high, thereby indicating a standing up ant. The initial orientation is designed to make it face forward as well. - ### Episode End + ## Episode End The ant is said to be unhealthy if any of the following happens: 1. Any of the state space values is no longer finite @@ -138,7 +138,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. - ### Arguments + ## Arguments No additional arguments are currently supported in v2 and lower. @@ -166,7 +166,7 @@ class AntEnv(MujocoEnv, utils.EzPickle): | `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation`| **bool** | `True`| Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 diff --git a/gymnasium/envs/mujoco/half_cheetah_v4.py b/gymnasium/envs/mujoco/half_cheetah_v4.py index d86443907..59652f865 100644 --- a/gymnasium/envs/mujoco/half_cheetah_v4.py +++ b/gymnasium/envs/mujoco/half_cheetah_v4.py @@ -13,7 +13,7 @@ DEFAULT_CAMERA_CONFIG = { class HalfCheetahEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment is based on the work by P. Wawrzyński in ["A Cat-Like Robot Real-Time Learning to Run"](http://staff.elka.pw.edu.pl/~pwawrzyn/pub-s/0812_LSCLRR.pdf). @@ -26,7 +26,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): over the front and back thighs (connecting to the torso), shins (connecting to the thighs) and feet (connecting to the shins). - ### Action Space + ## Action Space The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied between *links*. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -39,7 +39,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): | 5 | Torque applied on the front foot rotor | -1 | 1 | ffoot | hinge | torque (N m) | - ### Observation Space + ## Observation Space Observations consist of positional values of different body parts of the cheetah, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. @@ -74,7 +74,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): | 15 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angular velocity (rad/s) | | 16 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angular velocity (rad/s) | - ### Rewards + ## Rewards The reward consists of two parts: - *forward_reward*: A reward of moving forward which is measured as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is @@ -89,7 +89,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms - ### Starting State + ## Starting State All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,) with a noise added to the initial state for stochasticity. As seen before, the first 8 values in the @@ -98,10 +98,10 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): normal noise with a mean of 0 and standard deviation of `reset_noise_scale` is added to the initial velocity values of all zeros. - ### Episode End + ## Episode End The episode truncates when the episode length is greater than 1000. - ### Arguments + ## Arguments No additional arguments are currently supported in v2 and lower. @@ -125,7 +125,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle): | `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/mujoco/hopper_v4.py b/gymnasium/envs/mujoco/hopper_v4.py index 405d89272..e14c7ce3e 100644 --- a/gymnasium/envs/mujoco/hopper_v4.py +++ b/gymnasium/envs/mujoco/hopper_v4.py @@ -14,7 +14,7 @@ DEFAULT_CAMERA_CONFIG = { class HopperEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment is based on the work done by Erez, Tassa, and Todorov in ["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf). The environment aims to @@ -26,7 +26,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): forward (right) direction by applying torques on the three hinges connecting the four body parts. - ### Action Space + ## Action Space The action space is a `Box(-1, 1, (3,), float32)`. An action represents the torques applied between *links* | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -35,7 +35,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): | 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) | | 3 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) | - ### Observation Space + ## Observation Space Observations consist of positional values of different body parts of the hopper, followed by the velocities of those individual parts @@ -66,7 +66,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): | 10 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) | - ### Rewards + ## Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the hopper is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`. - *forward_reward*: A reward of hopping forward which is measured @@ -82,12 +82,12 @@ class HopperEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms - ### Starting State + ## Starting State All observations start in state (0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity. - ### Episode End + ## Episode End The hopper is said to be unhealthy if any of the following happens: 1. An element of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) is no longer contained in the closed interval specified by the argument `healthy_state_range` @@ -102,7 +102,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. - ### Arguments + ## Arguments No additional arguments are currently supported in v2 and lower. @@ -131,7 +131,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle): | `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/mujoco/humanoid_v4.py b/gymnasium/envs/mujoco/humanoid_v4.py index 7af4487d6..31cec5ba2 100644 --- a/gymnasium/envs/mujoco/humanoid_v4.py +++ b/gymnasium/envs/mujoco/humanoid_v4.py @@ -20,7 +20,7 @@ def mass_center(model, data): class HumanoidEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment is based on the environment introduced by Tassa, Erez and Todorov in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025). @@ -28,7 +28,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): legs and arms. The legs each consist of two links, and so the arms (representing the knees and elbows respectively). The goal of the environment is to walk forward as fast as possible without falling over. - ### Action Space + ## Action Space The action space is a `Box(-1, 1, (17,), float32)`. An action represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -51,7 +51,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): | 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) | | 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) | - ### Observation Space + ## Observation Space Observations consist of positional values of different body parts of the Humanoid, followed by the velocities of those individual parts (their derivatives) with all the @@ -139,7 +139,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): with contact forces (if contact forces are not used in your experiments, you can use version > 2.0). - ### Rewards + ## Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the humanoid is alive (see section Episode Termination for definition), it gets a reward of fixed value `healthy_reward` - *forward_reward*: A reward of walking forward which is measured as *`forward_reward_weight` * @@ -157,7 +157,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms - ### Starting State + ## Starting State All observations start in state (0.0, 0.0, 1.4, 1.0, 0.0 ... 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional and velocity values (values in the table) @@ -165,7 +165,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): selected to be high, thereby indicating a standing up humanoid. The initial orientation is designed to make it face forward as well. - ### Episode End + ## Episode End The humanoid is said to be unhealthy if the z-position of the torso is no longer contained in the closed interval specified by the argument `healthy_z_range`. @@ -177,7 +177,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. - ### Arguments + ## Arguments No additional arguments are currently supported in v2 and lower. @@ -205,7 +205,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle): | `reset_noise_scale` | **float** | `1e-2` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/mujoco/humanoidstandup_v4.py b/gymnasium/envs/mujoco/humanoidstandup_v4.py index 1ccb62215..efac91b74 100644 --- a/gymnasium/envs/mujoco/humanoidstandup_v4.py +++ b/gymnasium/envs/mujoco/humanoidstandup_v4.py @@ -7,7 +7,7 @@ from gymnasium.spaces import Box class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment is based on the environment introduced by Tassa, Erez and Todorov in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025). @@ -17,7 +17,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): and then the goal of the environment is to make the humanoid standup and then keep it standing by applying torques on the various hinges. - ### Action Space + ## Action Space The agent take a 17-element vector for actions. The action space is a continuous `(action, ...)` all in `[-1, 1]`, where `action` @@ -43,7 +43,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): | 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) | | 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) | - ### Observation Space + ## Observation Space The state space consists of positional values of different body parts of the Humanoid, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. @@ -125,7 +125,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): when using the Humanoid environment if you would like to report results with contact forces (if contact forces are not used in your experiments, you can use version > 2.0). - ### Rewards + ## Rewards The reward consists of three parts: - *uph_cost*: A reward for moving upward (in an attempt to stand up). This is not a relative reward which measures how much upward it has moved from the last timestep, but it is an @@ -143,7 +143,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *uph_cost + 1 - quad_ctrl_cost - quad_impact_cost* - ### Starting State + ## Starting State All observations start in state (0.0, 0.0, 0.105, 1.0, 0.0 ... 0.0) with a uniform noise in the range of [-0.01, 0.01] added to the positional and velocity values (values in the table) @@ -151,13 +151,13 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): to be low, thereby indicating a laying down humanoid. The initial orientation is designed to make it face forward as well. - ### Episode End + ## Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 1000 timesteps 2. Termination: Any of the state space values is no longer finite - ### Arguments + ## Arguments No additional arguments are currently supported. @@ -174,7 +174,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): env = gym.make('HumanoidStandup-v2') ``` - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py b/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py index 1834020d1..4b6bd491d 100644 --- a/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py +++ b/gymnasium/envs/mujoco/inverted_double_pendulum_v4.py @@ -7,7 +7,7 @@ from gymnasium.spaces import Box class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment originates from control theory and builds on the cartpole environment based on the work done by Barto, Sutton, and Anderson in @@ -19,7 +19,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): and the goal is to balance the second pole on top of the first pole, which is in turn on top of the cart, by applying continuous forces on the cart. - ### Action Space + ## Action Space The agent take a 1-element vector for actions. The action space is a continuous `(action)` in `[-1, 1]`, where `action` represents the numerical force applied to the cart (with magnitude representing the amount of force and @@ -29,7 +29,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): |-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------| | 0 | Force applied on the cart | -1 | 1 | slider | slide | Force (N) | - ### Observation Space + ## Observation Space The state space consists of positional values of different body parts of the pendulum system, followed by the velocities of those individual parts (their derivatives) with all the @@ -64,7 +64,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): ["Analytically-invertible dynamics with contacts and constraints: Theory and implementation in MuJoCo"](https://homes.cs.washington.edu/~todorov/papers/TodorovICRA14.pdf). - ### Rewards + ## Rewards The reward consists of two parts: - *alive_bonus*: The goal is to make the second inverted pendulum stand upright @@ -79,20 +79,20 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *alive_bonus - distance_penalty - velocity_penalty* - ### Starting State + ## Starting State All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-0.1, 0.1] added to the positional values (cart position and pole angles) and standard normal force with a standard deviation of 0.1 added to the velocity values for stochasticity. - ### Episode End + ## Episode End The episode ends when any of the following happens: 1.Truncation: The episode duration reaches 1000 timesteps. 2.Termination: Any of the state space values is no longer finite. 3.Termination: The y_coordinate of the tip of the second pole *is less than or equal* to 1. The maximum standing height of the system is 1.196 m when all the parts are perpendicularly vertical on top of each other). - ### Arguments + ## Arguments No additional arguments are currently supported. @@ -108,7 +108,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): env = gym.make('InvertedDoublePendulum-v2') ``` - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/mujoco/inverted_pendulum_v4.py b/gymnasium/envs/mujoco/inverted_pendulum_v4.py index b4d9f3c12..046c13b12 100644 --- a/gymnasium/envs/mujoco/inverted_pendulum_v4.py +++ b/gymnasium/envs/mujoco/inverted_pendulum_v4.py @@ -7,7 +7,7 @@ from gymnasium.spaces import Box class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment is the cartpole environment based on the work done by Barto, Sutton, and Anderson in ["Neuronlike adaptive elements that can @@ -18,7 +18,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): at one end and having another end free. The cart can be pushed left or right, and the goal is to balance the pole on the top of the cart by applying forces on the cart. - ### Action Space + ## Action Space The agent take a 1-element vector for actions. The action space is a continuous `(action)` in `[-3, 3]`, where `action` represents @@ -29,7 +29,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): |-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------| | 0 | Force applied on the cart | -3 | 3 | slider | slide | Force (N) | - ### Observation Space + ## Observation Space The state space consists of positional values of different body parts of the pendulum system, followed by the velocities of those individual parts (their derivatives) @@ -45,25 +45,25 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): | 3 | angular velocity of the pole on the cart | -Inf | Inf | hinge | hinge | anglular velocity (rad/s) | - ### Rewards + ## Rewards The goal is to make the inverted pendulum stand upright (within a certain angle limit) as long as possible - as such a reward of +1 is awarded for each timestep that the pole is upright. - ### Starting State + ## Starting State All observations start in state (0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-0.01, 0.01] added to the values for stochasticity. - ### Episode End + ## Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches 1000 timesteps. 2. Termination: Any of the state space values is no longer finite. 3. Termination: The absolute value of the vertical angle between the pole and the cart is greater than 0.2 radian. - ### Arguments + ## Arguments No additional arguments are currently supported. @@ -78,7 +78,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): env = gym.make('InvertedPendulum-v2') ``` - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/mujoco/pusher_v4.py b/gymnasium/envs/mujoco/pusher_v4.py index e23288f2a..c94be3673 100644 --- a/gymnasium/envs/mujoco/pusher_v4.py +++ b/gymnasium/envs/mujoco/pusher_v4.py @@ -7,12 +7,12 @@ from gymnasium.spaces import Box class PusherEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description "Pusher" is a multi-jointed robot arm which is very similar to that of a human. The goal is to move a target cylinder (called *object*) to a goal position using the robot's end effector (called *fingertip*). The robot consists of shoulder, elbow, forearm, and wrist joints. - ### Action Space + ## Action Space The action space is a `Box(-2, 2, (7,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -25,7 +25,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): | 5 | Rotation of flexing the wrist | -2 | 2 | r_wrist_flex_joint | hinge | torque (N m) | | 6 | Rotation of rolling the wrist | -2 | 2 | r_wrist_roll_joint | hinge | torque (N m) | - ### Observation Space + ## Observation Space Observations consist of @@ -66,7 +66,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): | 22 | z-coordinate of the goal position of the object | -Inf | Inf | goal | sphere | position (m) | - ### Rewards + ## Rewards The reward consists of two parts: - *reward_near *: This reward is a measure of how far the *fingertip* of the pusher (the unattached end) is from the object, with a more negative @@ -88,7 +88,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): you should create a wrapper that computes the weighted reward from `info`. - ### Starting State + ## Starting State All pusher (not including object and goal) states start in (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0). A uniform noise in the range [-0.005, 0.005] is added to the velocity attributes only. The velocities of @@ -99,14 +99,14 @@ class PusherEnv(MujocoEnv, utils.EzPickle): The default framerate is 5 with each frame lasting for 0.01, giving rise to a *dt = 5 * 0.01 = 0.05* - ### Episode End + ## Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 100 timesteps. 2. Termination: Any of the state space values is no longer finite. - ### Arguments + ## Arguments No additional arguments are currently supported (in v2 and lower), but modifications can be made to the XML file in the assets folder @@ -125,7 +125,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): env = gym.make('Pusher-v2') ``` - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v2: All continuous control environments now use mujoco_py >= 1.50 diff --git a/gymnasium/envs/mujoco/reacher_v4.py b/gymnasium/envs/mujoco/reacher_v4.py index 91869b737..aa7c74fce 100644 --- a/gymnasium/envs/mujoco/reacher_v4.py +++ b/gymnasium/envs/mujoco/reacher_v4.py @@ -7,11 +7,11 @@ from gymnasium.spaces import Box class ReacherEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description "Reacher" is a two-jointed robot arm. The goal is to move the robot's end effector (called *fingertip*) close to a target that is spawned at a random position. - ### Action Space + ## Action Space The action space is a `Box(-1, 1, (2,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -19,7 +19,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): | 0 | Torque applied at the first hinge (connecting the link to the point of fixture) | -1 | 1 | joint0 | hinge | torque (N m) | | 1 | Torque applied at the second hinge (connecting the two links) | -1 | 1 | joint1 | hinge | torque (N m) | - ### Observation Space + ## Observation Space Observations consist of @@ -60,7 +60,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): | 3 | y-coordinate of the target | -Inf | Inf | target_y | slide | position (m) | - ### Rewards + ## Rewards The reward consists of two parts: - *reward_distance*: This reward is a measure of how far the *fingertip* of the reacher (the unattached end) is from the target, with a more negative @@ -78,7 +78,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): you should create a wrapper that computes the weighted reward from `info`. - ### Starting State + ## Starting State All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a noise added for stochasticity. A uniform noise in the range @@ -89,14 +89,14 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): element ("fingertip" - "target") is calculated at the end once everything is set. The default setting has a framerate of 2 and a *dt = 2 * 0.01 = 0.02* - ### Episode End + ## Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 50 timesteps (with a new random target popping up if the reacher's fingertip reaches it before 50 timesteps) 2. Termination: Any of the state space values is no longer finite. - ### Arguments + ## Arguments No additional arguments are currently supported (in v2 and lower), but modifications can be made to the XML file in the assets folder @@ -110,7 +110,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): There is no v3 for Reacher, unlike the robot environments where a v3 and beyond take `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v2: All continuous control environments now use mujoco_py >= 1.50 diff --git a/gymnasium/envs/mujoco/swimmer_v4.py b/gymnasium/envs/mujoco/swimmer_v4.py index 820616a48..0d3e62304 100644 --- a/gymnasium/envs/mujoco/swimmer_v4.py +++ b/gymnasium/envs/mujoco/swimmer_v4.py @@ -11,7 +11,7 @@ DEFAULT_CAMERA_CONFIG = {} class SwimmerEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment corresponds to the Swimmer environment described in Rémi Coulom's PhD thesis ["Reinforcement Learning Using Neural Networks, with Applications to Motor Control"](https://tel.archives-ouvertes.fr/tel-00003985/document). @@ -24,7 +24,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): uniform distribution), and the goal is to move as fast as possible towards the right by applying torque on the rotors and using the fluids friction. - ### Notes + ## Notes The problem parameters are: Problem parameters: @@ -37,7 +37,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): and *k* = 0.1. It is possible to pass a custom MuJoCo XML file during construction to increase the number of links, or to tweak any of the parameters. - ### Action Space + ## Action Space The action space is a `Box(-1, 1, (2,), float32)`. An action represents the torques applied between *links* | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -45,7 +45,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): | 0 | Torque applied on the first rotor | -1 | 1 | motor1_rot | hinge | torque (N m) | | 1 | Torque applied on the second rotor | -1 | 1 | motor2_rot | hinge | torque (N m) | - ### Observation Space + ## Observation Space By default, observations consists of: * θi: angle of part *i* with respect to the *x* axis @@ -71,7 +71,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): | 6 | angular velocity of first rotor | -Inf | Inf | motor1_rot | hinge | angular velocity (rad/s) | | 7 | angular velocity of second rotor | -Inf | Inf | motor2_rot | hinge | angular velocity (rad/s) | - ### Rewards + ## Rewards The reward consists of two parts: - *forward_reward*: A reward of moving forward which is measured as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is @@ -86,13 +86,13 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms - ### Starting State + ## Starting State All observations start in state (0,0,0,0,0,0,0,0) with a Uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the initial state for stochasticity. - ### Episode End + ## Episode End The episode truncates when the episode length is greater than 1000. - ### Arguments + ## Arguments No additional arguments are currently supported in v2 and lower. @@ -117,7 +117,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle): | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/mujoco/walker2d_v4.py b/gymnasium/envs/mujoco/walker2d_v4.py index 8a613a707..83d8e81a5 100644 --- a/gymnasium/envs/mujoco/walker2d_v4.py +++ b/gymnasium/envs/mujoco/walker2d_v4.py @@ -14,7 +14,7 @@ DEFAULT_CAMERA_CONFIG = { class Walker2dEnv(MujocoEnv, utils.EzPickle): """ - ### Description + ## Description This environment builds on the hopper environment based on the work done by Erez, Tassa, and Todorov in ["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf) @@ -27,7 +27,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): The goal is to make coordinate both sets of feet, legs, and thighs to move in the forward (right) direction by applying torques on the six hinges connecting the six body parts. - ### Action Space + ## Action Space The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | @@ -39,7 +39,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): | 4 | Torque applied on the left leg rotor | -1 | 1 | leg_left_joint | hinge | torque (N m) | | 5 | Torque applied on the left foot rotor | -1 | 1 | foot_left_joint | hinge | torque (N m) | - ### Observation Space + ## Observation Space Observations consist of positional values of different body parts of the walker, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. @@ -72,7 +72,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): | 14 | angular velocity of the thigh hinge | -Inf | Inf | thigh_left_joint | hinge | angular velocity (rad/s) | | 15 | angular velocity of the leg hinge | -Inf | Inf | leg_left_joint | hinge | angular velocity (rad/s) | | 16 | angular velocity of the foot hinge | -Inf | Inf | foot_left_joint | hinge | angular velocity (rad/s) | - ### Rewards + ## Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the walker is alive, it receives a fixed reward of value `healthy_reward`, - *forward_reward*: A reward of walking forward which is measured as @@ -87,12 +87,12 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): The total reward returned is ***reward*** *=* *healthy_reward bonus + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms - ### Starting State + ## Starting State All observations start in state (0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity. - ### Episode End + ## Episode End The walker is said to be unhealthy if any of the following happens: 1. Any of the state space values is no longer finite @@ -107,7 +107,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. - ### Arguments + ## Arguments No additional arguments are currently supported in v2 and lower. @@ -136,7 +136,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle): | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | - ### Version History + ## Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for `gymnasium.make` kwargs such as `xml_file`, `ctrl_cost_weight`, `reset_noise_scale`, etc. rgb rendering comes from tracking camera (so agent does not run away from screen) diff --git a/gymnasium/envs/toy_text/blackjack.py b/gymnasium/envs/toy_text/blackjack.py index 2d89119e3..4d15b038e 100644 --- a/gymnasium/envs/toy_text/blackjack.py +++ b/gymnasium/envs/toy_text/blackjack.py @@ -51,7 +51,7 @@ class BlackjackEnv(gym.Env): Blackjack is a card game where the goal is to beat the dealer by obtaining cards that sum to closer to 21 (without going over 21) than the dealers cards. - ### Description + ## Description Card Values: - Face cards (Jack, Queen, King) have a point value of 10. @@ -69,10 +69,10 @@ class BlackjackEnv(gym.Env): If neither the player nor the dealer busts, the outcome (win, lose, draw) is decided by whose sum is closer to 21. - ### Action Space + ## Action Space There are two actions: stick (0), and hit (1). - ### Observation Space + ## Observation Space The observation consists of a 3-tuple containing: the player's current sum, the value of the dealer's one showing card (1-10 where 1 is ace), and whether the player holds a usable ace (0 or 1). @@ -81,7 +81,7 @@ class BlackjackEnv(gym.Env): described in Example 5.1 in Reinforcement Learning: An Introduction by Sutton and Barto (http://incompleteideas.net/book/the-book-2nd.html). - ### Rewards + ## Rewards - win game: +1 - lose game: -1 - draw game: 0 @@ -91,7 +91,7 @@ class BlackjackEnv(gym.Env): +1 (if natural is False) - ### Arguments + ## Arguments ```python import gymnasium as gym @@ -107,7 +107,7 @@ class BlackjackEnv(gym.Env): will win (i.e. get a reward of +1). The reverse rule does not apply. If both the player and the dealer get a natural, it will be a draw (i.e. reward 0). - ### Version History + ## Version History * v0: Initial versions release (1.0.0) """ diff --git a/gymnasium/envs/toy_text/cliffwalking.py b/gymnasium/envs/toy_text/cliffwalking.py index 0476ccc8f..0c2686812 100644 --- a/gymnasium/envs/toy_text/cliffwalking.py +++ b/gymnasium/envs/toy_text/cliffwalking.py @@ -27,7 +27,7 @@ class CliffWalkingEnv(Env): With inspiration from: [https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py](https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py) - ### Description + ## Description The board is a 4x12 matrix, with (using NumPy matrix indexing): - [3, 0] as the start at bottom-left - [3, 11] as the goal at bottom-right @@ -36,30 +36,30 @@ class CliffWalkingEnv(Env): If the agent steps on the cliff, it returns to the start. An episode terminates when the agent reaches the goal. - ### Actions + ## Actions There are 4 discrete deterministic actions: - 0: move up - 1: move right - 2: move down - 3: move left - ### Observations + ## Observations There are 3x12 + 1 possible states. In fact, the agent cannot be at the cliff, nor at the goal (as this results in the end of the episode). It remains all the positions of the first 3 rows plus the bottom-left cell. The observation is simply the current position encoded as [flattened index](https://numpy.org/doc/stable/reference/generated/numpy.unravel_index.html). - ### Reward + ## Reward Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward. - ### Arguments + ## Arguments ```python import gymnasium as gym gym.make('CliffWalking-v0') ``` - ### Version History + ## Version History - v0: Initial version release """ diff --git a/gymnasium/envs/toy_text/frozen_lake.py b/gymnasium/envs/toy_text/frozen_lake.py index 60e3a7000..9993330e4 100644 --- a/gymnasium/envs/toy_text/frozen_lake.py +++ b/gymnasium/envs/toy_text/frozen_lake.py @@ -80,7 +80,7 @@ class FrozenLakeEnv(Env): The agent may not always move in the intended direction due to the slippery nature of the frozen lake. - ### Action Space + ## Action Space The agent takes a 1-element vector for actions. The action space is `(dir)`, where `dir` decides direction to move in which can be: @@ -89,21 +89,21 @@ class FrozenLakeEnv(Env): - 2: RIGHT - 3: UP - ### Observation Space + ## Observation Space The observation is a value representing the agent's current position as current_row * nrows + current_col (where both the row and col start at 0). For example, the goal position in the 4x4 map can be calculated as follows: 3 * 4 + 3 = 15. The number of possible observations is dependent on the size of the map. For example, the 4x4 map has 16 possible observations. - ### Rewards + ## Rewards Reward schedule: - Reach goal(G): +1 - Reach hole(H): 0 - Reach frozen(F): 0 - ### Arguments + ## Arguments ```python import gymnasium as gym @@ -151,7 +151,7 @@ class FrozenLakeEnv(Env): - P(move up)=1/3 - P(move down)=1/3 - ### Version History + ## Version History * v1: Bug fixes to rewards * v0: Initial versions release (1.0.0) """ diff --git a/gymnasium/envs/toy_text/taxi.py b/gymnasium/envs/toy_text/taxi.py index 66697d059..a8146aafd 100644 --- a/gymnasium/envs/toy_text/taxi.py +++ b/gymnasium/envs/toy_text/taxi.py @@ -29,7 +29,7 @@ class TaxiEnv(Env): from "Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition" by Tom Dietterich - ### Description + ## Description There are four designated locations in the grid world indicated by R(ed), G(reen), Y(ellow), and B(lue). When the episode starts, the taxi starts off at a random square and the passenger is at a random location. The taxi @@ -47,7 +47,7 @@ class TaxiEnv(Env): |Y| : |B: | +---------+ - ### Actions + ## Actions There are 6 discrete deterministic actions: - 0: move south - 1: move north @@ -56,7 +56,7 @@ class TaxiEnv(Env): - 4: pickup passenger - 5: drop off passenger - ### Observations + ## Observations There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the taxi), and 4 destination locations. @@ -87,7 +87,7 @@ class TaxiEnv(Env): - 2: Y(ellow) - 3: B(lue) - ### Info + ## Info ``step`` and ``reset()`` will return an info dictionary that contains "p" and "action_mask" containing the probability that the state is taken and a mask of what actions will result in a change of state to speed up training. @@ -103,19 +103,19 @@ class TaxiEnv(Env): To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])`` Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``. - ### Rewards + ## Rewards - -1 per step unless other reward is triggered. - +20 delivering passenger. - -10 executing "pickup" and "drop-off" actions illegally. - ### Arguments + ## Arguments ```python import gymnasium as gym gym.make('Taxi-v3') ``` - ### Version History + ## Version History * v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information * v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold. * v1: Remove (3,2) from locs, add passidx<4 check diff --git a/gymnasium/spaces/graph.py b/gymnasium/spaces/graph.py index 3e8a80b57..cd1d09df8 100644 --- a/gymnasium/spaces/graph.py +++ b/gymnasium/spaces/graph.py @@ -108,10 +108,10 @@ class Graph(Space): (Box spaces don't support sample masks). If no `num_edges` is provided then the `edge_mask` is multiplied by the number of edges num_nodes: The number of nodes that will be sampled, the default is 10 nodes - num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2 + num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes` ^ 2 Returns: - A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. + A :class:`GraphInstance` with attributes `.nodes`, `.edges`, and `.edge_links`. """ assert ( num_nodes > 0 diff --git a/gymnasium/utils/play.py b/gymnasium/utils/play.py index 126953f2f..c171323db 100644 --- a/gymnasium/utils/play.py +++ b/gymnasium/utils/play.py @@ -161,7 +161,6 @@ def play( ... "as": np.array([-1, 0, 1]), ... }, noop=np.array([0,0,0])) - Above code works also if the environment is wrapped, so it's particularly useful in verifying that the frame-level preprocessing does not render the game unplayable. @@ -176,7 +175,6 @@ def play( >>> plotter = PlayPlot(callback, 150, ["reward"]) >>> play(gym.make("CartPole-v1"), callback=plotter.callback) - Args: env: Environment to use for playing. transpose: If this is ``True``, the output of observation is transposed. Defaults to ``True``. @@ -197,23 +195,29 @@ def play( one key. For example if pressing 'w' and space at the same time is supposed to trigger action number 2 then ``key_to_action`` dict could look like this: + >>> { ... # ... ... (ord('w'), ord(' ')): 2 ... # ... ... } + or like this: + >>> { ... # ... ... ("w", " "): 2 ... # ... ... } + or like this: + >>> { ... # ... ... "w ": 2 ... # ... ... } + If ``None``, default ``key_to_action`` mapping for that environment is used, if provided. seed: Random seed used when resetting the environment. If None, no seed is used. noop: The action used when no key input has been entered, or the entered key combination is unknown. diff --git a/gymnasium/vector/__init__.py b/gymnasium/vector/__init__.py index c28a53b7d..45613a26a 100644 --- a/gymnasium/vector/__init__.py +++ b/gymnasium/vector/__init__.py @@ -32,7 +32,7 @@ def make( Args: id: The environment ID. This must be a valid ID from the registry. num_envs: Number of copies of the environment. - asynchronous: If `True`, wraps the environments in an :class:`AsyncVectorEnv` (which uses `multiprocessing`_ to run the environments in parallel). If ``False``, wraps the environments in a :class:`SyncVectorEnv`. + asynchronous: If `True`, wraps the environments in an :class:`AsyncVectorEnv` (which uses `multiprocessing` to run the environments in parallel). If ``False``, wraps the environments in a :class:`SyncVectorEnv`. wrappers: If not ``None``, then apply the wrappers to each internal environment during creation. disable_env_checker: If to run the env checker for the first environment only. None will default to the environment spec `disable_env_checker` parameter (that is by default False), otherwise will run according to this argument (True = not run, False = run) diff --git a/gymnasium/vector/vector_env.py b/gymnasium/vector/vector_env.py index 344f6bf23..96c142532 100644 --- a/gymnasium/vector/vector_env.py +++ b/gymnasium/vector/vector_env.py @@ -10,14 +10,38 @@ __all__ = ["VectorEnv"] class VectorEnv(gym.Env): - """Base class for vectorized environments. Runs multiple independent copies of the same environment in parallel. + """Base class for vectorized environments to run multiple independent copies of the same environment in parallel. - This is not the same as 1 environment that has multiple subcomponents, but it is many copies of the same base env. + Vector environments can provide a linear speed-up in the steps taken per second through sampling multiple + sub-environments at the same time. To prevent terminated environments waiting until all sub-environments have + terminated or truncated, the vector environments autoreset sub-environments after they terminate or truncated. + As a result, the final step's observation and info are overwritten by the reset's observation and info. + Therefore, the observation and info for the final step of a sub-environment is stored in the info parameter, + using `"final_observation"` and `"final_info"` respectively. See :meth:`step` for more information. - Each observation returned from vectorized environment is a batch of observations for each parallel environment. - And :meth:`step` is also expected to receive a batch of actions for each parallel environment. + The vector environments batch `observations`, `rewards`, `terminations`, `truncations` and `info` for each + parallel environment. In addition, :meth:`step` expects to receive a batch of actions for each parallel environment. - Notes: + Gymnasium contains two types of Vector environments: :class:`AsyncVectorEnv` and :class:`SyncVectorEnv`. + + The Vector Environments have the additional attributes for users to understand the implementation + + - :attr:`num_envs` - The number of sub-environment in the vector environment + - :attr:`observation_space` - The batched observation space of the vector environment + - :attr:`single_observation_space` - The observation space of a single sub-environment + - :attr:`action_space` - The batched action space of the vector environment + - :attr:`single_action_space` - The action space of a single sub-environment + + Note: + The info parameter of :meth:`reset` and :meth:`step` was originally implemented before OpenAI Gym v25 was a list + of dictionary for each sub-environment. However, this was modified in OpenAI Gym v25+ and in Gymnasium to a + dictionary with a NumPy array for each key. To use the old info style using the :class:`VectorListInfo`. + + Note: + To render the sub-environments, use :meth:`call` with "render" arguments. Remember to set the `render_modes` + for all the sub-environments during initialization. + + Note: All parallel environments should share the identical observation and action spaces. In other words, a vector of multiple different environments is not supported. """ @@ -91,14 +115,24 @@ class VectorEnv(gym.Env): seed: Optional[Union[int, List[int]]] = None, options: Optional[dict] = None, ): - """Reset all parallel environments and return a batch of initial observations. + """Reset all parallel environments and return a batch of initial observations and info. Args: seed: The environment reset seeds options: If to return the options Returns: - A batch of observations from the vectorized environment. + A batch of observations and info from the vectorized environment. + + An example:: + + >>> import gymnasium as gym + >>> envs = gym.vector.make("CartPole-v1", num_envs=3) + >>> envs.reset() + (array([[-0.02240574, -0.03439831, -0.03904812, 0.02810693], + [ 0.01586068, 0.01929009, 0.02394426, 0.04016077], + [-0.01314174, 0.03893502, -0.02400815, 0.0038326 ]], + dtype=float32), {}) """ self.reset_async(seed=seed, options=options) return self.reset_wait(seed=seed, options=options) @@ -131,7 +165,33 @@ class VectorEnv(gym.Env): actions: element of :attr:`action_space` Batch of actions. Returns: - Batch of (observations, rewards, terminated, truncated, infos) or (observations, rewards, dones, infos) + Batch of (observations, rewards, terminations, truncations, infos) + + Note: + As the vector environments autoreset for a terminating and truncating sub-environments, + the returned observation and info is not the final step's observation or info which is instead stored in + info as `"final_observation"` and `"final_info"`. + + An example:: + + >>> envs = gym.vector.make("CartPole-v1", num_envs=3) + >>> envs.reset() + >>> actions = np.array([1, 0, 1]) + >>> observations, rewards, termination, truncation, infos = envs.step(actions) + + >>> observations + array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266], + [ 0.00788269, -0.17490888, 0.03393489, 0.31735462], + [ 0.04918966, 0.19421194, 0.02938497, -0.29495203]], + dtype=float32) + >>> rewards + array([1., 1., 1.]) + >>> termination + array([False, False, False]) + >>> termination + array([False, False, False]) + >>> infos + {} """ self.step_async(actions) return self.step_wait() @@ -192,7 +252,7 @@ class VectorEnv(gym.Env): in :meth:`close_extras`. This is generic for both synchronous and asynchronous vectorized environments. - Notes: + Note: This will be automatically called when garbage collected or program exited. Args: @@ -281,7 +341,7 @@ class VectorEnvWrapper(VectorEnv): could override some methods to change the behavior of the original vectorized environment without touching the original code. - Notes: + Note: Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. """ diff --git a/gymnasium/wrappers/__init__.py b/gymnasium/wrappers/__init__.py index 1ff067f3c..152dc4a22 100644 --- a/gymnasium/wrappers/__init__.py +++ b/gymnasium/wrappers/__init__.py @@ -1,8 +1,9 @@ """Module of wrapper classes.""" -from gymnasium import error from gymnasium.wrappers.atari_preprocessing import AtariPreprocessing from gymnasium.wrappers.autoreset import AutoResetWrapper from gymnasium.wrappers.clip_action import ClipAction +from gymnasium.wrappers.compatibility import EnvCompatibility +from gymnasium.wrappers.env_checker import PassiveEnvChecker from gymnasium.wrappers.filter_observation import FilterObservation from gymnasium.wrappers.flatten_observation import FlattenObservation from gymnasium.wrappers.frame_stack import FrameStack, LazyFrames @@ -10,6 +11,7 @@ from gymnasium.wrappers.gray_scale_observation import GrayScaleObservation from gymnasium.wrappers.human_rendering import HumanRendering from gymnasium.wrappers.normalize import NormalizeObservation, NormalizeReward from gymnasium.wrappers.order_enforcing import OrderEnforcing +from gymnasium.wrappers.pixel_observation import PixelObservationWrapper from gymnasium.wrappers.record_episode_statistics import RecordEpisodeStatistics from gymnasium.wrappers.record_video import RecordVideo, capped_cubic_video_schedule from gymnasium.wrappers.render_collection import RenderCollection diff --git a/gymnasium/wrappers/atari_preprocessing.py b/gymnasium/wrappers/atari_preprocessing.py index 779b3a25b..d4768c126 100644 --- a/gymnasium/wrappers/atari_preprocessing.py +++ b/gymnasium/wrappers/atari_preprocessing.py @@ -17,6 +17,7 @@ class AtariPreprocessing(gym.Wrapper): "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents". Specifically, the following preprocess stages applies to the atari environment: + - Noop Reset: Obtains the initial state by taking a random number of no-ops on reset, default max 30 no-ops. - Frame skipping: The number of frames skipped between steps, 4 by default - Max-pooling: Pools over the most recent two observations from the frame skips diff --git a/gymnasium/wrappers/autoreset.py b/gymnasium/wrappers/autoreset.py index 3f5f99ca1..ca7f4b179 100644 --- a/gymnasium/wrappers/autoreset.py +++ b/gymnasium/wrappers/autoreset.py @@ -8,6 +8,7 @@ class AutoResetWrapper(gym.Wrapper): When calling step causes :meth:`Env.step` to return `terminated=True` or `truncated=True`, :meth:`Env.reset` is called, and the return format of :meth:`self.step` is as follows: ``(new_obs, final_reward, final_terminated, final_truncated, info)`` with new step API and ``(new_obs, final_reward, final_done, info)`` with the old step API. + - ``new_obs`` is the first observation after calling :meth:`self.env.reset` - ``final_reward`` is the reward after calling :meth:`self.env.step`, prior to calling :meth:`self.env.reset`. - ``final_terminated`` is the terminated value before calling :meth:`self.env.reset`. diff --git a/gymnasium/wrappers/frame_stack.py b/gymnasium/wrappers/frame_stack.py index c0c48f0f0..14b039c0e 100644 --- a/gymnasium/wrappers/frame_stack.py +++ b/gymnasium/wrappers/frame_stack.py @@ -109,7 +109,8 @@ class FrameStack(gym.ObservationWrapper): - To be memory efficient, the stacked observations are wrapped by :class:`LazyFrame`. - The observation space must be :class:`Box` type. If one uses :class:`Dict` as observation space, it should apply :class:`FlattenObservation` wrapper first. - - After :meth:`reset` is called, the frame buffer will be filled with the initial observation. I.e. the observation returned by :meth:`reset` will consist of ``num_stack`-many identical frames, + - After :meth:`reset` is called, the frame buffer will be filled with the initial observation. + I.e. the observation returned by :meth:`reset` will consist of `num_stack` many identical frames. Example: >>> import gymnasium as gym diff --git a/gymnasium/wrappers/pixel_observation.py b/gymnasium/wrappers/pixel_observation.py index cf0d43cb5..0447486ce 100644 --- a/gymnasium/wrappers/pixel_observation.py +++ b/gymnasium/wrappers/pixel_observation.py @@ -57,16 +57,16 @@ class PixelObservationWrapper(gym.ObservationWrapper): Args: env: The environment to wrap. - pixels_only (bool): If ``True`` (default), the original observation returned + pixels_only (bool): If `True` (default), the original observation returned by the wrapped environment will be discarded, and a dictionary - observation will only include pixels. If ``False``, the + observation will only include pixels. If `False`, the observation dictionary will contain both the original observations and the pixel observations. - render_kwargs (dict): Optional dictionary containing that maps elements of ``pixel_keys``to + render_kwargs (dict): Optional dictionary containing that maps elements of `pixel_keys` to keyword arguments passed to the :meth:`self.render` method. pixel_keys: Optional custom string specifying the pixel - observation's key in the ``OrderedDict`` of observations. - Defaults to ``(pixels,)``. + observation's key in the `OrderedDict` of observations. + Defaults to `(pixels,)`. Raises: AssertionError: If any of the keys in ``render_kwargs``do not show up in ``pixel_keys``. diff --git a/gymnasium/wrappers/transform_observation.py b/gymnasium/wrappers/transform_observation.py index 5bd402d77..3ab27bf8c 100644 --- a/gymnasium/wrappers/transform_observation.py +++ b/gymnasium/wrappers/transform_observation.py @@ -21,7 +21,7 @@ class TransformObservation(gym.ObservationWrapper): """ def __init__(self, env: gym.Env, f: Callable[[Any], Any]): - """Initialize the :class:`TransformObservation` wrapper with an environment and a transform function :param:`f`. + """Initialize the :class:`TransformObservation` wrapper with an environment and a transform function :attr:`f`. Args: env: The environment to apply the wrapper diff --git a/gymnasium/wrappers/transform_reward.py b/gymnasium/wrappers/transform_reward.py index 8c9f0ed56..67e7b29a5 100644 --- a/gymnasium/wrappers/transform_reward.py +++ b/gymnasium/wrappers/transform_reward.py @@ -22,7 +22,7 @@ class TransformReward(RewardWrapper): """ def __init__(self, env: gym.Env, f: Callable[[float], float]): - """Initialize the :class:`TransformReward` wrapper with an environment and reward transform function :param:`f`. + """Initialize the :class:`TransformReward` wrapper with an environment and reward transform function :attr:`f`. Args: env: The environment to apply the wrapper diff --git a/gymnasium/wrappers/vector_list_info.py b/gymnasium/wrappers/vector_list_info.py index 18cf919be..cb2982da1 100644 --- a/gymnasium/wrappers/vector_list_info.py +++ b/gymnasium/wrappers/vector_list_info.py @@ -15,18 +15,17 @@ class VectorListInfo(gym.Wrapper): operation on info like `RecordEpisodeStatistics` this need to be the outermost wrapper. - i.e. VectorListInfo(RecordEpisodeStatistics(envs)) + i.e. `VectorListInfo(RecordEpisodeStatistics(envs))` Example:: - >>> # actual - >>> { - ... "k": np.array[0., 0., 0.5, 0.3], - ... "_k": np.array[False, False, True, True] - ... } - >>> # classic - >>> [{}, {}, {k: 0.5}, {k: 0.3}] - + >>> # actual + >>> { + ... "k": np.array[0., 0., 0.5, 0.3], + ... "_k": np.array[False, False, True, True] + ... } + >>> # classic + >>> [{}, {}, {k: 0.5}, {k: 0.3}] """ def __init__(self, env):