mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 06:07:08 +00:00
Improve the tutorial rendering (#1353)
This commit is contained in:
27
docs/conf.py
27
docs/conf.py
@@ -12,11 +12,11 @@
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
import sphinx_gallery.gen_rst
|
||||
import sphinx_gallery.sorting
|
||||
from furo.gen_tutorials import generate_tutorials
|
||||
|
||||
|
||||
@@ -123,10 +123,30 @@ sphinx_gallery.gen_rst.EXAMPLE_HEADER = """
|
||||
|
||||
.. rst-class:: sphx-glr-example-title
|
||||
|
||||
.. note::
|
||||
This example is compatible with Gymnasium version |release|.
|
||||
|
||||
.. _sphx_glr_{1}:
|
||||
|
||||
"""
|
||||
|
||||
tutorial_sorting = {
|
||||
"tutorials/gymnasium_basics": [
|
||||
"environment_creation",
|
||||
"implementing_custom_wrappers",
|
||||
"handling_time_limits",
|
||||
"load_quadruped_model",
|
||||
"*",
|
||||
],
|
||||
"tutorials/training_agents": [
|
||||
"blackjack_q_learning",
|
||||
"frozenlake_q_learning",
|
||||
"mujoco_reinforce",
|
||||
"vector_a2c",
|
||||
"*",
|
||||
],
|
||||
}
|
||||
|
||||
sphinx_gallery_conf = {
|
||||
"ignore_pattern": r"__init__\.py",
|
||||
"examples_dirs": "./tutorials",
|
||||
@@ -135,10 +155,13 @@ sphinx_gallery_conf = {
|
||||
"show_signature": False,
|
||||
"show_memory": False,
|
||||
"min_reported_time": float("inf"),
|
||||
"filename_pattern": f"{re.escape(os.sep)}run_",
|
||||
# "filename_pattern": f"{re.escape(os.sep)}run_",
|
||||
"default_thumb_file": os.path.join(
|
||||
os.path.dirname(__file__), "_static/img/gymnasium-github.png"
|
||||
),
|
||||
# order the tutorial presentation order
|
||||
"within_subsection_order": sphinx_gallery.sorting.FileNameSortKey,
|
||||
"subsection_order": lambda folder: tutorial_sorting[folder],
|
||||
}
|
||||
|
||||
# All tutorials in the tutorials directory will be generated automatically
|
||||
|
@@ -1,9 +1,2 @@
|
||||
Tutorials
|
||||
=========
|
||||
|
||||
We provide two sets of tutorials: basics and training.
|
||||
|
||||
* The aim of the basics tutorials is to showcase the fundamental API of Gymnasium to help users implement it
|
||||
* The most common application of Gymnasium is for training RL agents, the training tutorials aim to show a range of example implementations for different environments
|
||||
|
||||
Additionally, we provide the third party tutorials as a link for external projects that utilise Gymnasium that could help users.
|
||||
|
@@ -1,10 +1,6 @@
|
||||
Gymnasium Basics
|
||||
----------------
|
||||
================
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
.. _gallery_section_name:
|
||||
|
||||
environment_creation
|
||||
implementing_custom_wrappers
|
||||
handling_time_limits
|
||||
load_quadruped_model
|
||||
The aim of these tutorials is to showcase the fundamental API of Gymnasium to help users implement it
|
||||
|
@@ -3,10 +3,7 @@
|
||||
Make your own custom environment
|
||||
================================
|
||||
|
||||
This documentation overviews creating new environments and relevant
|
||||
useful wrappers, utilities and tests included in Gymnasium designed for
|
||||
the creation of new environments.
|
||||
|
||||
This tutorial shows how to create new environment and links to relevant useful wrappers, utilities and tests included in Gymnasium.
|
||||
|
||||
Setup
|
||||
------
|
||||
|
@@ -2,7 +2,10 @@
|
||||
Handling Time Limits
|
||||
====================
|
||||
|
||||
In using Gymnasium environments with reinforcement learning code, a common problem observed is how time limits are incorrectly handled. The ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended. However, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``.
|
||||
This tutorial explains how time limits should be correctly handled with `termination` and `truncation` signals.
|
||||
|
||||
The ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended.
|
||||
However, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``.
|
||||
|
||||
Termination
|
||||
-----------
|
||||
|
@@ -3,6 +3,7 @@ Implementing Custom Wrappers
|
||||
============================
|
||||
|
||||
In this tutorial we will describe how to implement your own custom wrappers.
|
||||
|
||||
Wrappers are a great way to add functionality to your environments in a modular way.
|
||||
This will save you a lot of boilerplate code.
|
||||
|
||||
|
@@ -2,8 +2,7 @@
|
||||
Load custom quadruped robot environments
|
||||
========================================
|
||||
|
||||
In this tutorial we will see how to use the `MuJoCo/Ant-v5` framework to create a quadruped walking environment,
|
||||
using a model file (ending in `.xml`) without having to create a new class.
|
||||
In this tutorial create a mujoco quadruped walking environment using a model file (ending in `.xml`) without having to create a new class.
|
||||
|
||||
Steps:
|
||||
|
||||
|
@@ -1,10 +1,6 @@
|
||||
Training Agents
|
||||
---------------
|
||||
===============
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
.. _gallery_section_name:
|
||||
|
||||
blackjack_q_learning
|
||||
frozenlake_q_learning
|
||||
mujoco_reinforce
|
||||
vector_a2c
|
||||
The most common application of Gymnasium is for training RL agents. Therefore, these tutorials aim to show a range of example implementations for different environments.
|
||||
|
@@ -1,7 +1,8 @@
|
||||
"""
|
||||
Solving Blackjack with Q-Learning
|
||||
=================================
|
||||
Solving Blackjack with Tabular Q-Learning
|
||||
=========================================
|
||||
|
||||
This tutorial trains an agent for BlackJack using tabular Q-learning.
|
||||
"""
|
||||
|
||||
# %%
|
||||
|
@@ -1,7 +1,8 @@
|
||||
"""
|
||||
Frozenlake benchmark
|
||||
====================
|
||||
Solving Frozenlake with Tabular Q-Learning
|
||||
==========================================
|
||||
|
||||
This tutorial trains an agent for FrozenLake using tabular Q-learning.
|
||||
"""
|
||||
|
||||
# %%
|
||||
|
@@ -7,9 +7,7 @@ Training using REINFORCE for Mujoco
|
||||
:width: 400
|
||||
:alt: agent-environment-diagram
|
||||
|
||||
This tutorial serves 2 purposes:
|
||||
1. To understand how to implement REINFORCE [1] from scratch to solve Mujoco's InvertedPendulum-v4
|
||||
2. Implementation a deep reinforcement learning algorithm with Gymnasium's v0.26+ `step()` function
|
||||
This tutorial implements REINFORCE with neural networks for a MuJoCo environment.
|
||||
|
||||
We will be using **REINFORCE**, one of the earliest policy gradient methods. Unlike going under the burden of learning a value function first and then deriving a policy out of it,
|
||||
REINFORCE optimizes the policy directly. In other words, it is trained to maximize the probability of Monte-Carlo returns. More on that later.
|
||||
|
@@ -1,14 +1,15 @@
|
||||
"""
|
||||
Training A2C with Vector Envs and Domain Randomization
|
||||
======================================================
|
||||
Speeding up A2C Training with Vector Envs
|
||||
=========================================
|
||||
|
||||
This tutorial demonstrates training with vector environments to it speed up.
|
||||
"""
|
||||
|
||||
# %%
|
||||
# Notice
|
||||
# ------
|
||||
#
|
||||
# If you encounter an RuntimeError like the following comment raised on multiprocessing/spawn.py, wrap up the code from ``gym.vector.make=`` or ``gym.vector.AsyncVectorEnv`` to the end of the code by ``if__name__ == '__main__'``.
|
||||
# If you encounter an RuntimeError like the following comment raised on multiprocessing/spawn.py, wrap up the code from ``gym.make_vec=`` or ``gym.vector.AsyncVectorEnv`` to the end of the code by ``if__name__ == '__main__'``.
|
||||
#
|
||||
# ``An attempt has been made to start a new process before the current process has finished its bootstrapping phase.``
|
||||
#
|
||||
|
Reference in New Issue
Block a user