Tutorials galleries (#258)

This commit is contained in:
Manuel Goulão
2023-01-11 14:00:51 -06:00
committed by GitHub
parent 35fe9b0f13
commit 4e6dc3e420
14 changed files with 54 additions and 53 deletions

9
docs/.gitignore vendored
View File

@@ -4,9 +4,12 @@ __pycache__
build/ build/
_build/ _build/
tutorials/* tutorials/**/*.pickle
!tutorials/*.md tutorials/**/images/
!tutorials/*.py tutorials/**/*.rst
tutorials/**/*.ipynb
tutorials/**/*.zip
!tutorials/**/README.rst
environments/**/list.html environments/**/list.html
environments/**/complete_list.html environments/**/complete_list.html

View File

@@ -16,9 +16,10 @@
# -- Project information ----------------------------------------------------- # -- Project information -----------------------------------------------------
import os import os
import re
from typing import Any, Dict from typing import Any, Dict
from furo import gen_tutorials import sphinx_gallery.gen_rst
import gymnasium import gymnasium
@@ -43,6 +44,7 @@ extensions = [
"sphinx.ext.githubpages", "sphinx.ext.githubpages",
"myst_parser", "myst_parser",
"furo.gen_tutorials", "furo.gen_tutorials",
"sphinx_gallery.gen_gallery",
"sphinx_github_changelog", "sphinx_github_changelog",
] ]
@@ -52,7 +54,7 @@ templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and # List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files. # directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path. # This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["tutorials/demo.rst"] exclude_patterns = ["tutorials/README.rst"]
# Napoleon settings # Napoleon settings
napoleon_use_ivar = True napoleon_use_ivar = True
@@ -95,10 +97,29 @@ html_css_files = []
# -- Generate Tutorials ------------------------------------------------- # -- Generate Tutorials -------------------------------------------------
gen_tutorials.generate( sphinx_gallery.gen_rst.EXAMPLE_HEADER = """
os.path.dirname(__file__), .. DO NOT EDIT.
os.path.join(os.path.dirname(__file__), "tutorials"), .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
) .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "{0}"
.. LINE NUMBERS ARE GIVEN BELOW.
.. rst-class:: sphx-glr-example-title
.. _sphx_glr_{1}:
"""
sphinx_gallery_conf = {
"ignore_pattern": r"__init__\.py",
"examples_dirs": "./tutorials",
"gallery_dirs": "./tutorials",
"show_signature": False,
"show_memory": False,
"min_reported_time": float("inf"),
"filename_pattern": f"{re.escape(os.sep)}run_",
"default_thumb_file": "_static/img/gymnasium-github.png",
}
# -- Generate Changelog ------------------------------------------------- # -- Generate Changelog -------------------------------------------------

View File

@@ -67,7 +67,7 @@ environments/third_party_environments
:glob: :glob:
:caption: Tutorials :caption: Tutorials
tutorials/* tutorials/**/index
``` ```
```{toctree} ```{toctree}

View File

@@ -1,7 +1,7 @@
sphinx sphinx
sphinx-autobuild sphinx-autobuild
myst-parser myst-parser
sphinx_gallery git+https://github.com/sphinx-gallery/sphinx-gallery.git@4006662c8c1984453a247dc6d3df6260e5b00f4b#egg=sphinx_gallery
git+https://github.com/Farama-Foundation/Celshast#egg=furo git+https://github.com/Farama-Foundation/Celshast#egg=furo
moviepy moviepy
pygame pygame

View File

@@ -0,0 +1,2 @@
Tutorials
=========

View File

@@ -1,29 +0,0 @@
"""
Demo tutorial script
=========================
This file is not listed in the website and serves only to give an example of a tutorial file. And is mostly a copy-paste from sphinx-gallery.
"""
# %%
# This is a section header
# ------------------------
# This is the first section!
# The `#%%` signifies to Sphinx-Gallery that this text should be rendered as
# rST and if using one of the above IDE/plugin's, also signifies the start of a
# 'code block'.
# This line won't be rendered as rST because there's a space after the last block.
myvariable = 2
print(f"my variable is {myvariable}")
# This is the end of the 'code block' (if using an above IDE). All code within
# this block can be easily executed all at once.
# %%
# This is another section header
# ------------------------------
#
# In the built documentation, it will be rendered as rST after the code above!
# This is also another code block.
print(f"my variable plus 2 is {myvariable + 2}")

View File

@@ -0,0 +1,2 @@
Gymnasium Basics
----------------

View File

@@ -1,13 +1,13 @@
""" """
Training A2C with Vector Envs and Domain Randomization Training A2C with Vector Envs and Domain Randomization
================================= ======================================================
""" """
# %% # %%
# Introduction # Introduction
# ------------------------------ # ------------
# #
# In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent. # In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent.
# We are going to use A2C, which is the synchronous version of the A3C algorithm [1]. # We are going to use A2C, which is the synchronous version of the A3C algorithm [1].
@@ -56,7 +56,7 @@ import gymnasium as gym
# %% # %%
# Advantage Actor-Critic (A2C) # Advantage Actor-Critic (A2C)
# ------------------------------ # ----------------------------
# #
# The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks: # The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks:
# a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions. # a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions.
@@ -241,7 +241,7 @@ class A2C(nn.Module):
# %% # %%
# Using Vectorized Environments # Using Vectorized Environments
# ------------------------------ # -----------------------------
# #
# When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments, # When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments,
# we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker) # we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker)
@@ -259,7 +259,7 @@ envs = gym.vector.make("LunarLander-v2", num_envs=3, max_episode_steps=600)
# %% # %%
# Domain Randomization # Domain Randomization
# ------------------------------ # --------------------
# #
# If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment # If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
# and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them. # and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
@@ -337,7 +337,7 @@ envs = gym.vector.AsyncVectorEnv(
# %% # %%
# Setup # Setup
# ------------------------------ # -----
# #
# environment hyperparams # environment hyperparams
@@ -398,7 +398,7 @@ agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)
# %% # %%
# Training the A2C Agent # Training the A2C Agent
# ------------------------------ # ----------------------
# #
# For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving # For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving
# the losses and entropies to plot them after the agent finished training. # the losses and entropies to plot them after the agent finished training.
@@ -478,7 +478,7 @@ for sample_phase in tqdm(range(n_updates)):
# %% # %%
# Plotting # Plotting
# ------------------------------ # --------
# #
""" plot the results """ """ plot the results """
@@ -550,7 +550,7 @@ plt.show()
# %% # %%
# Performance Analysis of Synchronous and Asynchronous Vectorized Environments # Performance Analysis of Synchronous and Asynchronous Vectorized Environments
# ------------------------------ # ----------------------------------------------------------------------------
# #
# %% # %%
@@ -608,7 +608,7 @@ plt.show()
# %% # %%
# Saving/ Loading Weights # Saving/ Loading Weights
# ------------------------------ # -----------------------
# #
save_weights = False save_weights = False
@@ -638,7 +638,7 @@ if load_weights:
# %% # %%
# Showcase the Agent # Showcase the Agent
# ------------------------------ # ------------------
# #
""" play a couple of showcase episodes """ """ play a couple of showcase episodes """
@@ -690,7 +690,7 @@ env.close()
# %% # %%
# Try playing the environment yourself # Try playing the environment yourself
# ------------------------------ # ------------------------------------
# #
# from gymnasium.utils.play import play # from gymnasium.utils.play import play
@@ -701,7 +701,7 @@ env.close()
# %% # %%
# References # References
# ------------------------------ # ----------
# #
# [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016). # [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016).
# #

View File

@@ -0,0 +1,2 @@
Training Agents
---------------