mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 06:07:08 +00:00
Tutorials galleries (#258)
This commit is contained in:
9
docs/.gitignore
vendored
9
docs/.gitignore
vendored
@@ -4,9 +4,12 @@ __pycache__
|
|||||||
build/
|
build/
|
||||||
_build/
|
_build/
|
||||||
|
|
||||||
tutorials/*
|
tutorials/**/*.pickle
|
||||||
!tutorials/*.md
|
tutorials/**/images/
|
||||||
!tutorials/*.py
|
tutorials/**/*.rst
|
||||||
|
tutorials/**/*.ipynb
|
||||||
|
tutorials/**/*.zip
|
||||||
|
!tutorials/**/README.rst
|
||||||
|
|
||||||
environments/**/list.html
|
environments/**/list.html
|
||||||
environments/**/complete_list.html
|
environments/**/complete_list.html
|
||||||
|
33
docs/conf.py
33
docs/conf.py
@@ -16,9 +16,10 @@
|
|||||||
|
|
||||||
# -- Project information -----------------------------------------------------
|
# -- Project information -----------------------------------------------------
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from furo import gen_tutorials
|
import sphinx_gallery.gen_rst
|
||||||
|
|
||||||
import gymnasium
|
import gymnasium
|
||||||
|
|
||||||
@@ -43,6 +44,7 @@ extensions = [
|
|||||||
"sphinx.ext.githubpages",
|
"sphinx.ext.githubpages",
|
||||||
"myst_parser",
|
"myst_parser",
|
||||||
"furo.gen_tutorials",
|
"furo.gen_tutorials",
|
||||||
|
"sphinx_gallery.gen_gallery",
|
||||||
"sphinx_github_changelog",
|
"sphinx_github_changelog",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -52,7 +54,7 @@ templates_path = ["_templates"]
|
|||||||
# List of patterns, relative to source directory, that match files and
|
# List of patterns, relative to source directory, that match files and
|
||||||
# directories to ignore when looking for source files.
|
# directories to ignore when looking for source files.
|
||||||
# This pattern also affects html_static_path and html_extra_path.
|
# This pattern also affects html_static_path and html_extra_path.
|
||||||
exclude_patterns = ["tutorials/demo.rst"]
|
exclude_patterns = ["tutorials/README.rst"]
|
||||||
|
|
||||||
# Napoleon settings
|
# Napoleon settings
|
||||||
napoleon_use_ivar = True
|
napoleon_use_ivar = True
|
||||||
@@ -95,10 +97,29 @@ html_css_files = []
|
|||||||
|
|
||||||
# -- Generate Tutorials -------------------------------------------------
|
# -- Generate Tutorials -------------------------------------------------
|
||||||
|
|
||||||
gen_tutorials.generate(
|
sphinx_gallery.gen_rst.EXAMPLE_HEADER = """
|
||||||
os.path.dirname(__file__),
|
.. DO NOT EDIT.
|
||||||
os.path.join(os.path.dirname(__file__), "tutorials"),
|
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
|
||||||
)
|
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
|
||||||
|
.. "{0}"
|
||||||
|
.. LINE NUMBERS ARE GIVEN BELOW.
|
||||||
|
|
||||||
|
.. rst-class:: sphx-glr-example-title
|
||||||
|
|
||||||
|
.. _sphx_glr_{1}:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
sphinx_gallery_conf = {
|
||||||
|
"ignore_pattern": r"__init__\.py",
|
||||||
|
"examples_dirs": "./tutorials",
|
||||||
|
"gallery_dirs": "./tutorials",
|
||||||
|
"show_signature": False,
|
||||||
|
"show_memory": False,
|
||||||
|
"min_reported_time": float("inf"),
|
||||||
|
"filename_pattern": f"{re.escape(os.sep)}run_",
|
||||||
|
"default_thumb_file": "_static/img/gymnasium-github.png",
|
||||||
|
}
|
||||||
|
|
||||||
# -- Generate Changelog -------------------------------------------------
|
# -- Generate Changelog -------------------------------------------------
|
||||||
|
|
||||||
|
@@ -67,7 +67,7 @@ environments/third_party_environments
|
|||||||
:glob:
|
:glob:
|
||||||
:caption: Tutorials
|
:caption: Tutorials
|
||||||
|
|
||||||
tutorials/*
|
tutorials/**/index
|
||||||
```
|
```
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
sphinx
|
sphinx
|
||||||
sphinx-autobuild
|
sphinx-autobuild
|
||||||
myst-parser
|
myst-parser
|
||||||
sphinx_gallery
|
git+https://github.com/sphinx-gallery/sphinx-gallery.git@4006662c8c1984453a247dc6d3df6260e5b00f4b#egg=sphinx_gallery
|
||||||
git+https://github.com/Farama-Foundation/Celshast#egg=furo
|
git+https://github.com/Farama-Foundation/Celshast#egg=furo
|
||||||
moviepy
|
moviepy
|
||||||
pygame
|
pygame
|
||||||
|
2
docs/tutorials/README.rst
Normal file
2
docs/tutorials/README.rst
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
Tutorials
|
||||||
|
=========
|
@@ -1,29 +0,0 @@
|
|||||||
"""
|
|
||||||
Demo tutorial script
|
|
||||||
=========================
|
|
||||||
|
|
||||||
This file is not listed in the website and serves only to give an example of a tutorial file. And is mostly a copy-paste from sphinx-gallery.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# %%
|
|
||||||
# This is a section header
|
|
||||||
# ------------------------
|
|
||||||
# This is the first section!
|
|
||||||
# The `#%%` signifies to Sphinx-Gallery that this text should be rendered as
|
|
||||||
# rST and if using one of the above IDE/plugin's, also signifies the start of a
|
|
||||||
# 'code block'.
|
|
||||||
|
|
||||||
# This line won't be rendered as rST because there's a space after the last block.
|
|
||||||
myvariable = 2
|
|
||||||
print(f"my variable is {myvariable}")
|
|
||||||
# This is the end of the 'code block' (if using an above IDE). All code within
|
|
||||||
# this block can be easily executed all at once.
|
|
||||||
|
|
||||||
# %%
|
|
||||||
# This is another section header
|
|
||||||
# ------------------------------
|
|
||||||
#
|
|
||||||
# In the built documentation, it will be rendered as rST after the code above!
|
|
||||||
# This is also another code block.
|
|
||||||
|
|
||||||
print(f"my variable plus 2 is {myvariable + 2}")
|
|
2
docs/tutorials/gymnasium_basics/README.rst
Normal file
2
docs/tutorials/gymnasium_basics/README.rst
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
Gymnasium Basics
|
||||||
|
----------------
|
@@ -1,13 +1,13 @@
|
|||||||
"""
|
"""
|
||||||
Training A2C with Vector Envs and Domain Randomization
|
Training A2C with Vector Envs and Domain Randomization
|
||||||
=================================
|
======================================================
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Introduction
|
# Introduction
|
||||||
# ------------------------------
|
# ------------
|
||||||
#
|
#
|
||||||
# In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent.
|
# In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent.
|
||||||
# We are going to use A2C, which is the synchronous version of the A3C algorithm [1].
|
# We are going to use A2C, which is the synchronous version of the A3C algorithm [1].
|
||||||
@@ -56,7 +56,7 @@ import gymnasium as gym
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Advantage Actor-Critic (A2C)
|
# Advantage Actor-Critic (A2C)
|
||||||
# ------------------------------
|
# ----------------------------
|
||||||
#
|
#
|
||||||
# The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks:
|
# The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks:
|
||||||
# a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions.
|
# a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions.
|
||||||
@@ -241,7 +241,7 @@ class A2C(nn.Module):
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Using Vectorized Environments
|
# Using Vectorized Environments
|
||||||
# ------------------------------
|
# -----------------------------
|
||||||
#
|
#
|
||||||
# When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments,
|
# When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments,
|
||||||
# we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker)
|
# we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker)
|
||||||
@@ -259,7 +259,7 @@ envs = gym.vector.make("LunarLander-v2", num_envs=3, max_episode_steps=600)
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Domain Randomization
|
# Domain Randomization
|
||||||
# ------------------------------
|
# --------------------
|
||||||
#
|
#
|
||||||
# If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
|
# If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
|
||||||
# and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
|
# and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
|
||||||
@@ -337,7 +337,7 @@ envs = gym.vector.AsyncVectorEnv(
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Setup
|
# Setup
|
||||||
# ------------------------------
|
# -----
|
||||||
#
|
#
|
||||||
|
|
||||||
# environment hyperparams
|
# environment hyperparams
|
||||||
@@ -398,7 +398,7 @@ agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Training the A2C Agent
|
# Training the A2C Agent
|
||||||
# ------------------------------
|
# ----------------------
|
||||||
#
|
#
|
||||||
# For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving
|
# For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving
|
||||||
# the losses and entropies to plot them after the agent finished training.
|
# the losses and entropies to plot them after the agent finished training.
|
||||||
@@ -478,7 +478,7 @@ for sample_phase in tqdm(range(n_updates)):
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Plotting
|
# Plotting
|
||||||
# ------------------------------
|
# --------
|
||||||
#
|
#
|
||||||
|
|
||||||
""" plot the results """
|
""" plot the results """
|
||||||
@@ -550,7 +550,7 @@ plt.show()
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Performance Analysis of Synchronous and Asynchronous Vectorized Environments
|
# Performance Analysis of Synchronous and Asynchronous Vectorized Environments
|
||||||
# ------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
@@ -608,7 +608,7 @@ plt.show()
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Saving/ Loading Weights
|
# Saving/ Loading Weights
|
||||||
# ------------------------------
|
# -----------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
save_weights = False
|
save_weights = False
|
||||||
@@ -638,7 +638,7 @@ if load_weights:
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Showcase the Agent
|
# Showcase the Agent
|
||||||
# ------------------------------
|
# ------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
""" play a couple of showcase episodes """
|
""" play a couple of showcase episodes """
|
||||||
@@ -690,7 +690,7 @@ env.close()
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Try playing the environment yourself
|
# Try playing the environment yourself
|
||||||
# ------------------------------
|
# ------------------------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
# from gymnasium.utils.play import play
|
# from gymnasium.utils.play import play
|
||||||
@@ -701,7 +701,7 @@ env.close()
|
|||||||
|
|
||||||
# %%
|
# %%
|
||||||
# References
|
# References
|
||||||
# ------------------------------
|
# ----------
|
||||||
#
|
#
|
||||||
# [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016).
|
# [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016).
|
||||||
#
|
#
|
2
docs/tutorials/training_agents/README.rst
Normal file
2
docs/tutorials/training_agents/README.rst
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
Training Agents
|
||||||
|
---------------
|
Reference in New Issue
Block a user