Tutorials galleries (#258)

2025-07-31 22:04:31 +00:00 · 2023-01-11 14:00:51 -06:00
parent 35fe9b0f13
commit 4e6dc3e420
14 changed files with 54 additions and 53 deletions
--- a/docs/tutorials/README.rst
+++ b/docs/tutorials/README.rst
@@ -0,0 +1,2 @@
+Tutorials
+=========
--- a/docs/tutorials/demo.py
+++ b/docs/tutorials/demo.py
@@ -1,29 +0,0 @@
-"""
-Demo tutorial script
-=========================
-
-This file is not listed in the website and serves only to give an example of a tutorial file. And is mostly a copy-paste from sphinx-gallery.
-"""
-
-# %%
-# This is a section header
-# ------------------------
-# This is the first section!
-# The `#%%` signifies to Sphinx-Gallery that this text should be rendered as
-# rST and if using one of the above IDE/plugin's, also signifies the start of a
-# 'code block'.
-
-# This line won't be rendered as rST because there's a space after the last block.
-myvariable = 2
-print(f"my variable is {myvariable}")
-# This is the end of the 'code block' (if using an above IDE). All code within
-# this block can be easily executed all at once.
-
-# %%
-# This is another section header
-# ------------------------------
-#
-# In the built documentation, it will be rendered as rST after the code above!
-# This is also another code block.
-
-print(f"my variable plus 2 is {myvariable + 2}")
--- a/docs/tutorials/gymnasium_basics/README.rst
+++ b/docs/tutorials/gymnasium_basics/README.rst
@@ -0,0 +1,2 @@
+Gymnasium Basics
+----------------
--- a/docs/tutorials/gymnasium_basics/environment_creation.py
+++ b/docs/tutorials/gymnasium_basics/environment_creation.py
--- a/docs/tutorials/gymnasium_basics/handling_time_limits.py
+++ b/docs/tutorials/gymnasium_basics/handling_time_limits.py
--- a/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py
+++ b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py
--- a/docs/tutorials/gymnasium_basics/vector_envs_tutorial.py
+++ b/docs/tutorials/gymnasium_basics/vector_envs_tutorial.py
@@ -1,13 +1,13 @@
 """
 Training A2C with Vector Envs and Domain Randomization
-=================================
+======================================================

 """


 # %%
 # Introduction
-# ------------------------------
+# ------------
 #
 # In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent.
 # We are going to use A2C, which is the synchronous version of the A3C algorithm [1].
@@ -56,7 +56,7 @@ import gymnasium as gym

 # %%
 # Advantage Actor-Critic (A2C)
-# ------------------------------
+# ----------------------------
 #
 # The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks:
 # a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions.
@@ -241,7 +241,7 @@ class A2C(nn.Module):

 # %%
 # Using Vectorized Environments
-# ------------------------------
+# -----------------------------
 #
 # When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments,
 # we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker)
@@ -259,7 +259,7 @@ envs = gym.vector.make("LunarLander-v2", num_envs=3, max_episode_steps=600)

 # %%
 # Domain Randomization
-# ------------------------------
+# --------------------
 #
 # If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
 # and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
@@ -337,7 +337,7 @@ envs = gym.vector.AsyncVectorEnv(

 # %%
 # Setup
-# ------------------------------
+# -----
 #

 # environment hyperparams
@@ -398,7 +398,7 @@ agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)

 # %%
 # Training the A2C Agent
-# ------------------------------
+# ----------------------
 #
 # For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving
 # the losses and entropies to plot them after the agent finished training.
@@ -478,7 +478,7 @@ for sample_phase in tqdm(range(n_updates)):

 # %%
 # Plotting
-# ------------------------------
+# --------
 #

 """ plot the results """
@@ -550,7 +550,7 @@ plt.show()

 # %%
 # Performance Analysis of Synchronous and Asynchronous Vectorized Environments
-# ------------------------------
+# ----------------------------------------------------------------------------
 #

 # %%
@@ -608,7 +608,7 @@ plt.show()

 # %%
 # Saving/ Loading Weights
-# ------------------------------
+# -----------------------
 #

 save_weights = False
@@ -638,7 +638,7 @@ if load_weights:

 # %%
 # Showcase the Agent
-# ------------------------------
+# ------------------
 #

 """ play a couple of showcase episodes """
@@ -690,7 +690,7 @@ env.close()

 # %%
 # Try playing the environment yourself
-# ------------------------------
+# ------------------------------------
 #

 # from gymnasium.utils.play import play
@@ -701,7 +701,7 @@ env.close()

 # %%
 # References
-# ------------------------------
+# ----------
 #
 # [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016).
 #
--- a/docs/tutorials/training_agents/README.rst
+++ b/docs/tutorials/training_agents/README.rst
@@ -0,0 +1,2 @@
+Training Agents
+---------------
--- a/docs/tutorials/training_agents/blackjack_tutorial.py
+++ b/docs/tutorials/training_agents/blackjack_tutorial.py
--- a/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py
+++ b/docs/tutorials/training_agents/reinforce_invpend_gym_v26.py