mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 06:07:08 +00:00
Improve the tutorial rendering (#1353)
This commit is contained in:
27
docs/conf.py
27
docs/conf.py
@@ -12,11 +12,11 @@
|
|||||||
|
|
||||||
# -- Project information -----------------------------------------------------
|
# -- Project information -----------------------------------------------------
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import sphinx_gallery.gen_rst
|
import sphinx_gallery.gen_rst
|
||||||
|
import sphinx_gallery.sorting
|
||||||
from furo.gen_tutorials import generate_tutorials
|
from furo.gen_tutorials import generate_tutorials
|
||||||
|
|
||||||
|
|
||||||
@@ -123,10 +123,30 @@ sphinx_gallery.gen_rst.EXAMPLE_HEADER = """
|
|||||||
|
|
||||||
.. rst-class:: sphx-glr-example-title
|
.. rst-class:: sphx-glr-example-title
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
This example is compatible with Gymnasium version |release|.
|
||||||
|
|
||||||
.. _sphx_glr_{1}:
|
.. _sphx_glr_{1}:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
tutorial_sorting = {
|
||||||
|
"tutorials/gymnasium_basics": [
|
||||||
|
"environment_creation",
|
||||||
|
"implementing_custom_wrappers",
|
||||||
|
"handling_time_limits",
|
||||||
|
"load_quadruped_model",
|
||||||
|
"*",
|
||||||
|
],
|
||||||
|
"tutorials/training_agents": [
|
||||||
|
"blackjack_q_learning",
|
||||||
|
"frozenlake_q_learning",
|
||||||
|
"mujoco_reinforce",
|
||||||
|
"vector_a2c",
|
||||||
|
"*",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
sphinx_gallery_conf = {
|
sphinx_gallery_conf = {
|
||||||
"ignore_pattern": r"__init__\.py",
|
"ignore_pattern": r"__init__\.py",
|
||||||
"examples_dirs": "./tutorials",
|
"examples_dirs": "./tutorials",
|
||||||
@@ -135,10 +155,13 @@ sphinx_gallery_conf = {
|
|||||||
"show_signature": False,
|
"show_signature": False,
|
||||||
"show_memory": False,
|
"show_memory": False,
|
||||||
"min_reported_time": float("inf"),
|
"min_reported_time": float("inf"),
|
||||||
"filename_pattern": f"{re.escape(os.sep)}run_",
|
# "filename_pattern": f"{re.escape(os.sep)}run_",
|
||||||
"default_thumb_file": os.path.join(
|
"default_thumb_file": os.path.join(
|
||||||
os.path.dirname(__file__), "_static/img/gymnasium-github.png"
|
os.path.dirname(__file__), "_static/img/gymnasium-github.png"
|
||||||
),
|
),
|
||||||
|
# order the tutorial presentation order
|
||||||
|
"within_subsection_order": sphinx_gallery.sorting.FileNameSortKey,
|
||||||
|
"subsection_order": lambda folder: tutorial_sorting[folder],
|
||||||
}
|
}
|
||||||
|
|
||||||
# All tutorials in the tutorials directory will be generated automatically
|
# All tutorials in the tutorials directory will be generated automatically
|
||||||
|
@@ -1,9 +1,2 @@
|
|||||||
Tutorials
|
Tutorials
|
||||||
=========
|
=========
|
||||||
|
|
||||||
We provide two sets of tutorials: basics and training.
|
|
||||||
|
|
||||||
* The aim of the basics tutorials is to showcase the fundamental API of Gymnasium to help users implement it
|
|
||||||
* The most common application of Gymnasium is for training RL agents, the training tutorials aim to show a range of example implementations for different environments
|
|
||||||
|
|
||||||
Additionally, we provide the third party tutorials as a link for external projects that utilise Gymnasium that could help users.
|
|
||||||
|
@@ -1,10 +1,6 @@
|
|||||||
Gymnasium Basics
|
Gymnasium Basics
|
||||||
----------------
|
================
|
||||||
|
|
||||||
.. toctree::
|
.. _gallery_section_name:
|
||||||
:hidden:
|
|
||||||
|
|
||||||
environment_creation
|
The aim of these tutorials is to showcase the fundamental API of Gymnasium to help users implement it
|
||||||
implementing_custom_wrappers
|
|
||||||
handling_time_limits
|
|
||||||
load_quadruped_model
|
|
||||||
|
@@ -3,10 +3,7 @@
|
|||||||
Make your own custom environment
|
Make your own custom environment
|
||||||
================================
|
================================
|
||||||
|
|
||||||
This documentation overviews creating new environments and relevant
|
This tutorial shows how to create new environment and links to relevant useful wrappers, utilities and tests included in Gymnasium.
|
||||||
useful wrappers, utilities and tests included in Gymnasium designed for
|
|
||||||
the creation of new environments.
|
|
||||||
|
|
||||||
|
|
||||||
Setup
|
Setup
|
||||||
------
|
------
|
||||||
|
@@ -2,7 +2,10 @@
|
|||||||
Handling Time Limits
|
Handling Time Limits
|
||||||
====================
|
====================
|
||||||
|
|
||||||
In using Gymnasium environments with reinforcement learning code, a common problem observed is how time limits are incorrectly handled. The ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended. However, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``.
|
This tutorial explains how time limits should be correctly handled with `termination` and `truncation` signals.
|
||||||
|
|
||||||
|
The ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended.
|
||||||
|
However, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``.
|
||||||
|
|
||||||
Termination
|
Termination
|
||||||
-----------
|
-----------
|
||||||
|
@@ -3,6 +3,7 @@ Implementing Custom Wrappers
|
|||||||
============================
|
============================
|
||||||
|
|
||||||
In this tutorial we will describe how to implement your own custom wrappers.
|
In this tutorial we will describe how to implement your own custom wrappers.
|
||||||
|
|
||||||
Wrappers are a great way to add functionality to your environments in a modular way.
|
Wrappers are a great way to add functionality to your environments in a modular way.
|
||||||
This will save you a lot of boilerplate code.
|
This will save you a lot of boilerplate code.
|
||||||
|
|
||||||
|
@@ -2,8 +2,7 @@
|
|||||||
Load custom quadruped robot environments
|
Load custom quadruped robot environments
|
||||||
========================================
|
========================================
|
||||||
|
|
||||||
In this tutorial we will see how to use the `MuJoCo/Ant-v5` framework to create a quadruped walking environment,
|
In this tutorial create a mujoco quadruped walking environment using a model file (ending in `.xml`) without having to create a new class.
|
||||||
using a model file (ending in `.xml`) without having to create a new class.
|
|
||||||
|
|
||||||
Steps:
|
Steps:
|
||||||
|
|
||||||
|
@@ -1,10 +1,6 @@
|
|||||||
Training Agents
|
Training Agents
|
||||||
---------------
|
===============
|
||||||
|
|
||||||
.. toctree::
|
.. _gallery_section_name:
|
||||||
:hidden:
|
|
||||||
|
|
||||||
blackjack_q_learning
|
The most common application of Gymnasium is for training RL agents. Therefore, these tutorials aim to show a range of example implementations for different environments.
|
||||||
frozenlake_q_learning
|
|
||||||
mujoco_reinforce
|
|
||||||
vector_a2c
|
|
||||||
|
@@ -1,7 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
Solving Blackjack with Q-Learning
|
Solving Blackjack with Tabular Q-Learning
|
||||||
=================================
|
=========================================
|
||||||
|
|
||||||
|
This tutorial trains an agent for BlackJack using tabular Q-learning.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
@@ -1,7 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
Frozenlake benchmark
|
Solving Frozenlake with Tabular Q-Learning
|
||||||
====================
|
==========================================
|
||||||
|
|
||||||
|
This tutorial trains an agent for FrozenLake using tabular Q-learning.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
@@ -7,9 +7,7 @@ Training using REINFORCE for Mujoco
|
|||||||
:width: 400
|
:width: 400
|
||||||
:alt: agent-environment-diagram
|
:alt: agent-environment-diagram
|
||||||
|
|
||||||
This tutorial serves 2 purposes:
|
This tutorial implements REINFORCE with neural networks for a MuJoCo environment.
|
||||||
1. To understand how to implement REINFORCE [1] from scratch to solve Mujoco's InvertedPendulum-v4
|
|
||||||
2. Implementation a deep reinforcement learning algorithm with Gymnasium's v0.26+ `step()` function
|
|
||||||
|
|
||||||
We will be using **REINFORCE**, one of the earliest policy gradient methods. Unlike going under the burden of learning a value function first and then deriving a policy out of it,
|
We will be using **REINFORCE**, one of the earliest policy gradient methods. Unlike going under the burden of learning a value function first and then deriving a policy out of it,
|
||||||
REINFORCE optimizes the policy directly. In other words, it is trained to maximize the probability of Monte-Carlo returns. More on that later.
|
REINFORCE optimizes the policy directly. In other words, it is trained to maximize the probability of Monte-Carlo returns. More on that later.
|
||||||
|
@@ -1,14 +1,15 @@
|
|||||||
"""
|
"""
|
||||||
Training A2C with Vector Envs and Domain Randomization
|
Speeding up A2C Training with Vector Envs
|
||||||
======================================================
|
=========================================
|
||||||
|
|
||||||
|
This tutorial demonstrates training with vector environments to it speed up.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Notice
|
# Notice
|
||||||
# ------
|
# ------
|
||||||
#
|
#
|
||||||
# If you encounter an RuntimeError like the following comment raised on multiprocessing/spawn.py, wrap up the code from ``gym.vector.make=`` or ``gym.vector.AsyncVectorEnv`` to the end of the code by ``if__name__ == '__main__'``.
|
# If you encounter an RuntimeError like the following comment raised on multiprocessing/spawn.py, wrap up the code from ``gym.make_vec=`` or ``gym.vector.AsyncVectorEnv`` to the end of the code by ``if__name__ == '__main__'``.
|
||||||
#
|
#
|
||||||
# ``An attempt has been made to start a new process before the current process has finished its bootstrapping phase.``
|
# ``An attempt has been made to start a new process before the current process has finished its bootstrapping phase.``
|
||||||
#
|
#
|
||||||
|
Reference in New Issue
Block a user