diff --git a/docs/conf.py b/docs/conf.py index 8f987add0..b54dd4890 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,11 +12,11 @@ # -- Project information ----------------------------------------------------- import os -import re import sys import time import sphinx_gallery.gen_rst +import sphinx_gallery.sorting from furo.gen_tutorials import generate_tutorials @@ -123,10 +123,30 @@ sphinx_gallery.gen_rst.EXAMPLE_HEADER = """ .. rst-class:: sphx-glr-example-title +.. note:: + This example is compatible with Gymnasium version |release|. + .. _sphx_glr_{1}: """ +tutorial_sorting = { + "tutorials/gymnasium_basics": [ + "environment_creation", + "implementing_custom_wrappers", + "handling_time_limits", + "load_quadruped_model", + "*", + ], + "tutorials/training_agents": [ + "blackjack_q_learning", + "frozenlake_q_learning", + "mujoco_reinforce", + "vector_a2c", + "*", + ], +} + sphinx_gallery_conf = { "ignore_pattern": r"__init__\.py", "examples_dirs": "./tutorials", @@ -135,10 +155,13 @@ sphinx_gallery_conf = { "show_signature": False, "show_memory": False, "min_reported_time": float("inf"), - "filename_pattern": f"{re.escape(os.sep)}run_", + # "filename_pattern": f"{re.escape(os.sep)}run_", "default_thumb_file": os.path.join( os.path.dirname(__file__), "_static/img/gymnasium-github.png" ), + # order the tutorial presentation order + "within_subsection_order": sphinx_gallery.sorting.FileNameSortKey, + "subsection_order": lambda folder: tutorial_sorting[folder], } # All tutorials in the tutorials directory will be generated automatically diff --git a/docs/tutorials/README.rst b/docs/tutorials/README.rst index 7a4df0ff8..0c7e28c3b 100644 --- a/docs/tutorials/README.rst +++ b/docs/tutorials/README.rst @@ -1,9 +1,2 @@ Tutorials ========= - -We provide two sets of tutorials: basics and training. - -* The aim of the basics tutorials is to showcase the fundamental API of Gymnasium to help users implement it -* The most common application of Gymnasium is for training RL agents, the training tutorials aim to show a range of example implementations for different environments - -Additionally, we provide the third party tutorials as a link for external projects that utilise Gymnasium that could help users. diff --git a/docs/tutorials/gymnasium_basics/README.rst b/docs/tutorials/gymnasium_basics/README.rst index c5d9619d4..08bdeb191 100644 --- a/docs/tutorials/gymnasium_basics/README.rst +++ b/docs/tutorials/gymnasium_basics/README.rst @@ -1,10 +1,6 @@ Gymnasium Basics ----------------- +================ -.. toctree:: - :hidden: +.. _gallery_section_name: - environment_creation - implementing_custom_wrappers - handling_time_limits - load_quadruped_model +The aim of these tutorials is to showcase the fundamental API of Gymnasium to help users implement it diff --git a/docs/tutorials/gymnasium_basics/environment_creation.py b/docs/tutorials/gymnasium_basics/environment_creation.py index 621dce046..699f291e8 100644 --- a/docs/tutorials/gymnasium_basics/environment_creation.py +++ b/docs/tutorials/gymnasium_basics/environment_creation.py @@ -3,10 +3,7 @@ Make your own custom environment ================================ -This documentation overviews creating new environments and relevant -useful wrappers, utilities and tests included in Gymnasium designed for -the creation of new environments. - +This tutorial shows how to create new environment and links to relevant useful wrappers, utilities and tests included in Gymnasium. Setup ------ diff --git a/docs/tutorials/gymnasium_basics/handling_time_limits.py b/docs/tutorials/gymnasium_basics/handling_time_limits.py index 268b91de1..35c793ad0 100644 --- a/docs/tutorials/gymnasium_basics/handling_time_limits.py +++ b/docs/tutorials/gymnasium_basics/handling_time_limits.py @@ -2,7 +2,10 @@ Handling Time Limits ==================== -In using Gymnasium environments with reinforcement learning code, a common problem observed is how time limits are incorrectly handled. The ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended. However, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``. +This tutorial explains how time limits should be correctly handled with `termination` and `truncation` signals. + +The ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended. +However, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``. Termination ----------- diff --git a/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py index b3a7f653d..c2667ca9f 100644 --- a/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py +++ b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py @@ -3,6 +3,7 @@ Implementing Custom Wrappers ============================ In this tutorial we will describe how to implement your own custom wrappers. + Wrappers are a great way to add functionality to your environments in a modular way. This will save you a lot of boilerplate code. diff --git a/docs/tutorials/gymnasium_basics/load_quadruped_model.py b/docs/tutorials/gymnasium_basics/load_quadruped_model.py index 76f432fb8..aba417d2d 100644 --- a/docs/tutorials/gymnasium_basics/load_quadruped_model.py +++ b/docs/tutorials/gymnasium_basics/load_quadruped_model.py @@ -2,8 +2,7 @@ Load custom quadruped robot environments ======================================== -In this tutorial we will see how to use the `MuJoCo/Ant-v5` framework to create a quadruped walking environment, -using a model file (ending in `.xml`) without having to create a new class. +In this tutorial create a mujoco quadruped walking environment using a model file (ending in `.xml`) without having to create a new class. Steps: diff --git a/docs/tutorials/training_agents/README.rst b/docs/tutorials/training_agents/README.rst index f73d49f36..de7aa222f 100644 --- a/docs/tutorials/training_agents/README.rst +++ b/docs/tutorials/training_agents/README.rst @@ -1,10 +1,6 @@ Training Agents ---------------- +=============== -.. toctree:: - :hidden: +.. _gallery_section_name: - blackjack_q_learning - frozenlake_q_learning - mujoco_reinforce - vector_a2c +The most common application of Gymnasium is for training RL agents. Therefore, these tutorials aim to show a range of example implementations for different environments. diff --git a/docs/tutorials/training_agents/blackjack_q_learning.py b/docs/tutorials/training_agents/blackjack_q_learning.py index da150df90..88d7e0ea1 100644 --- a/docs/tutorials/training_agents/blackjack_q_learning.py +++ b/docs/tutorials/training_agents/blackjack_q_learning.py @@ -1,7 +1,8 @@ """ -Solving Blackjack with Q-Learning -================================= +Solving Blackjack with Tabular Q-Learning +========================================= +This tutorial trains an agent for BlackJack using tabular Q-learning. """ # %% diff --git a/docs/tutorials/training_agents/frozenlake_q_learning.py b/docs/tutorials/training_agents/frozenlake_q_learning.py index 5986bde8e..f1e643b18 100644 --- a/docs/tutorials/training_agents/frozenlake_q_learning.py +++ b/docs/tutorials/training_agents/frozenlake_q_learning.py @@ -1,7 +1,8 @@ """ -Frozenlake benchmark -==================== +Solving Frozenlake with Tabular Q-Learning +========================================== +This tutorial trains an agent for FrozenLake using tabular Q-learning. """ # %% diff --git a/docs/tutorials/training_agents/mujoco_reinforce.py b/docs/tutorials/training_agents/mujoco_reinforce.py index f7dca007e..64f92339a 100644 --- a/docs/tutorials/training_agents/mujoco_reinforce.py +++ b/docs/tutorials/training_agents/mujoco_reinforce.py @@ -7,9 +7,7 @@ Training using REINFORCE for Mujoco :width: 400 :alt: agent-environment-diagram -This tutorial serves 2 purposes: - 1. To understand how to implement REINFORCE [1] from scratch to solve Mujoco's InvertedPendulum-v4 - 2. Implementation a deep reinforcement learning algorithm with Gymnasium's v0.26+ `step()` function +This tutorial implements REINFORCE with neural networks for a MuJoCo environment. We will be using **REINFORCE**, one of the earliest policy gradient methods. Unlike going under the burden of learning a value function first and then deriving a policy out of it, REINFORCE optimizes the policy directly. In other words, it is trained to maximize the probability of Monte-Carlo returns. More on that later. diff --git a/docs/tutorials/training_agents/vector_a2c.py b/docs/tutorials/training_agents/vector_a2c.py index 5c282e92a..2c1aeb661 100644 --- a/docs/tutorials/training_agents/vector_a2c.py +++ b/docs/tutorials/training_agents/vector_a2c.py @@ -1,14 +1,15 @@ """ -Training A2C with Vector Envs and Domain Randomization -====================================================== +Speeding up A2C Training with Vector Envs +========================================= +This tutorial demonstrates training with vector environments to it speed up. """ # %% # Notice # ------ # -# If you encounter an RuntimeError like the following comment raised on multiprocessing/spawn.py, wrap up the code from ``gym.vector.make=`` or ``gym.vector.AsyncVectorEnv`` to the end of the code by ``if__name__ == '__main__'``. +# If you encounter an RuntimeError like the following comment raised on multiprocessing/spawn.py, wrap up the code from ``gym.make_vec=`` or ``gym.vector.AsyncVectorEnv`` to the end of the code by ``if__name__ == '__main__'``. # # ``An attempt has been made to start a new process before the current process has finished its bootstrapping phase.`` #