This commit is contained in:
pseudo-rnd-thoughts
2025-06-27 08:24:19 +00:00
parent 8f1358e398
commit 34fc9d1961
529 changed files with 227268 additions and 0 deletions

4
v1.2.0/.buildinfo Normal file
View File

@@ -0,0 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: cf2f46ba41b047a4e64ef75619dcaa75
tags: d77d1c0d9ca2f4c8421862c7c5a0d620

0
v1.2.0/.nojekyll Normal file
View File

695
v1.2.0/404.html Normal file
View File

@@ -0,0 +1,695 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/404.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="index" title="Index" href="/genindex/" /><link rel="search" title="Search" href="/search/" />
<link rel="canonical" href="https://gymnasium.farama.org/404.html" />
<link rel="shortcut icon" href="/_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>404 - Page Not Found - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="/_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="/_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="/_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="/_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="/_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="/_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="/_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="/">
<img class="farama-header__logo only-light" src="/_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="/_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="/_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="/_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="/"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="/">
<img class="farama-header__logo only-light" src="/_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="/_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="/introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="/introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="/introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="/introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="/introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="/introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="/introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="/api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="/api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="/api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="/api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="/api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="/api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="/api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="/api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="/environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="/environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="/environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="/environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="/environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="/environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="/environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="/tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="/tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="/tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="/tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="/gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="/gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="edit-this-page">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/edit/main/docs/404.md" title="Edit this page">
<svg aria-hidden="true" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<path d="M4 20h4l10.5 -10.5a1.5 1.5 0 0 0 -4 -4l-10.5 10.5v4" />
<line x1="13.5" y1="6.5" x2="17.5" y2="10.5" />
</svg>
<span class="visually-hidden">Edit this page</span>
</a>
</div><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<section class="tex2jax_ignore mathjax_ignore" id="page-not-found">
<h1>404 - Page Not Found<a class="headerlink" href="#page-not-found" title="Link to this heading"></a></h1>
<section id="the-requested-page-could-not-be-found">
<h2>The requested page could not be found.<a class="headerlink" href="#the-requested-page-could-not-be-found" title="Link to this heading"></a></h2>
</section>
</section>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="/_static/documentation_options.js?v=151cd43d"></script>
<script src="/_static/doctools.js?v=9a2dae69"></script>
<script src="/_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="/_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

1
v1.2.0/CNAME Normal file
View File

@@ -0,0 +1 @@
gymnasium.farama.org

764
v1.2.0/README/index.html Normal file
View File

@@ -0,0 +1,764 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/README.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="index" title="Index" href="../genindex/" /><link rel="search" title="Search" href="../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/README.html" />
<link rel="shortcut icon" href="../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>Gymnasium-docs - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../">
<img class="farama-header__logo only-light" src="../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../">
<img class="farama-header__logo only-light" src="../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="edit-this-page">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/edit/main/docs/README.md" title="Edit this page">
<svg aria-hidden="true" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<path d="M4 20h4l10.5 -10.5a1.5 1.5 0 0 0 -4 -4l-10.5 10.5v4" />
<line x1="13.5" y1="6.5" x2="17.5" y2="10.5" />
</svg>
<span class="visually-hidden">Edit this page</span>
</a>
</div><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<section class="tex2jax_ignore mathjax_ignore" id="gymnasium-docs">
<h1>Gymnasium-docs<a class="headerlink" href="#gymnasium-docs" title="Link to this heading"></a></h1>
<p>This folder contains the documentation for <a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Gymnasium</a>.</p>
<section id="instructions-for-modifying-environment-pages">
<h2>Instructions for modifying environment pages<a class="headerlink" href="#instructions-for-modifying-environment-pages" title="Link to this heading"></a></h2>
<section id="editing-an-environment-page">
<h3>Editing an environment page<a class="headerlink" href="#editing-an-environment-page" title="Link to this heading"></a></h3>
<p>Fork Gymnasium and edit the docstring in the environments Python file. Then, pip install your Gymnasium fork and run <code class="docutils literal notranslate"><span class="pre">docs/_scripts/gen_mds.py</span></code> in this repo. This will automatically generate a Markdown documentation file for the environment.</p>
</section>
<section id="adding-a-new-environment">
<h3>Adding a new environment<a class="headerlink" href="#adding-a-new-environment" title="Link to this heading"></a></h3>
<p>Ensure the environment is in Gymnasium (or your fork). Ensure that the environments Python file has a properly formatted markdown docstring. Install using <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">-e</span> <span class="pre">.</span></code> and then run <code class="docutils literal notranslate"><span class="pre">docs/_scripts/gen_mds.py</span></code>. This will automatically generate a md page for the environment. Then complete the <a class="reference internal" href="#other-steps"><span class="xref myst">other steps</span></a>.</p>
<section id="other-steps">
<h4>Other steps<a class="headerlink" href="#other-steps" title="Link to this heading"></a></h4>
<ul class="simple">
<li><p>Add the corresponding gif into the <code class="docutils literal notranslate"><span class="pre">docs/_static/videos/{ENV_TYPE}</span></code> folder, where <code class="docutils literal notranslate"><span class="pre">ENV_TYPE</span></code> is the category of your new environment (e.g. mujoco). Follow snake_case naming convention. Alternatively, run <code class="docutils literal notranslate"><span class="pre">docs/_scripts/gen_gifs.py</span></code>.</p></li>
<li><p>Edit <code class="docutils literal notranslate"><span class="pre">docs/environments/{ENV_TYPE}/index.md</span></code>, and add the name of the file corresponding to your new environment to the <code class="docutils literal notranslate"><span class="pre">toctree</span></code>.</p></li>
</ul>
</section>
</section>
</section>
<section id="build-the-documentation">
<h2>Build the Documentation<a class="headerlink" href="#build-the-documentation" title="Link to this heading"></a></h2>
<p>Install the required packages and Gymnasium (or your fork):</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">gymnasium</span>
<span class="n">pip</span> <span class="n">install</span> <span class="o">-</span><span class="n">r</span> <span class="n">docs</span><span class="o">/</span><span class="n">requirements</span><span class="o">.</span><span class="n">txt</span>
</pre></div>
</div>
<p>To build the documentation once:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">cd</span> <span class="n">docs</span>
<span class="n">make</span> <span class="n">dirhtml</span>
</pre></div>
</div>
<p>To rebuild the documentation automatically every time a change is made:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">cd</span> <span class="n">docs</span>
<span class="n">sphinx</span><span class="o">-</span><span class="n">autobuild</span> <span class="o">-</span><span class="n">b</span> <span class="n">dirhtml</span> <span class="o">--</span><span class="n">watch</span> <span class="o">../</span><span class="n">gymnasium</span> <span class="o">--</span><span class="n">re</span><span class="o">-</span><span class="n">ignore</span> <span class="s2">&quot;pickle$&quot;</span> <span class="o">.</span> <span class="n">_build</span>
</pre></div>
</div>
<p>You can then open http://localhost:8000 in your browser to watch a live updated version of the documentation.</p>
</section>
<section id="writing-tutorials">
<h2>Writing Tutorials<a class="headerlink" href="#writing-tutorials" title="Link to this heading"></a></h2>
<p>We use Sphinx-Gallery to build the tutorials inside the <code class="docutils literal notranslate"><span class="pre">docs/tutorials</span></code> directory. Check <code class="docutils literal notranslate"><span class="pre">docs/tutorials/demo.py</span></code> to see an example of a tutorial and <a class="reference external" href="https://sphinx-gallery.github.io/stable/syntax.html">Sphinx-Gallery documentation</a> for more information.</p>
<p>To convert Jupyter Notebooks to the python tutorials you can use <a class="reference external" href="https://gist.github.com/mgoulao/f07f5f79f6cd9a721db8a34bba0a19a7">this script</a>.</p>
<p>If you want Sphinx-Gallery to execute the tutorial (which adds outputs and plots) then the file name should start with <code class="docutils literal notranslate"><span class="pre">run_</span></code>. Note that this adds to the build time so make sure the script doesnt take more than a few seconds to execute.</p>
</section>
</section>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer">
<div class="toc-sticky toc-scroll">
<div class="toc-title-container">
<span class="toc-title">
On this page
</span>
</div>
<div class="toc-tree-container">
<div class="toc-tree">
<ul>
<li><a class="reference internal" href="#">Gymnasium-docs</a><ul>
<li><a class="reference internal" href="#instructions-for-modifying-environment-pages">Instructions for modifying environment pages</a><ul>
<li><a class="reference internal" href="#editing-an-environment-page">Editing an environment page</a></li>
<li><a class="reference internal" href="#adding-a-new-environment">Adding a new environment</a><ul>
<li><a class="reference internal" href="#other-steps">Other steps</a></li>
</ul>
</li>
</ul>
</li>
<li><a class="reference internal" href="#build-the-documentation">Build the Documentation</a></li>
<li><a class="reference internal" href="#writing-tutorials">Writing Tutorials</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div>
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../_static/documentation_options.js?v=151cd43d"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

View File

@@ -0,0 +1,193 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Make your own custom environment\n\nThis tutorial shows how to create new environment and links to relevant useful wrappers, utilities and tests included in Gymnasium.\n\n## Setup\n\n### Recommended solution\n\n1. Install ``pipx`` following the [pipx documentation](https://pypa.github.io/pipx/installation/).\n2. Then install Copier:\n\n.. code:: console\n\n pipx install copier\n\n### Alternative solutions\n\nInstall Copier with Pip or Conda:\n\n.. code:: console\n\n pip install copier\n\nor\n\n.. code:: console\n\n conda install -c conda-forge copier\n\n\n## Generate your environment\n\nYou can check that ``Copier`` has been correctly installed by running the following command, which should output a version number:\n\n.. code:: console\n\n copier --version\n\nThen you can just run the following command and replace the string ``path/to/directory`` by the path to the directory where you want to create your new project.\n\n.. code:: console\n\n copier copy https://github.com/Farama-Foundation/gymnasium-env-template.git \"path/to/directory\"\n\nAnswer the questions, and when it's finished you should get a project structure like the following:\n\n.. code:: sh\n\n .\n \u251c\u2500\u2500 gymnasium_env\n \u2502 \u251c\u2500\u2500 envs\n \u2502 \u2502 \u251c\u2500\u2500 grid_world.py\n \u2502 \u2502 \u2514\u2500\u2500 __init__.py\n \u2502 \u251c\u2500\u2500 __init__.py\n \u2502 \u2514\u2500\u2500 wrappers\n \u2502 \u251c\u2500\u2500 clip_reward.py\n \u2502 \u251c\u2500\u2500 discrete_actions.py\n \u2502 \u251c\u2500\u2500 __init__.py\n \u2502 \u251c\u2500\u2500 reacher_weighted_reward.py\n \u2502 \u2514\u2500\u2500 relative_position.py\n \u251c\u2500\u2500 LICENSE\n \u251c\u2500\u2500 pyproject.toml\n \u2514\u2500\u2500 README.md\n\n## Subclassing gymnasium.Env\n\nBefore learning how to create your own environment you should check out\n[the documentation of Gymnasium\u2019s API](/api/env)_.\n\nTo illustrate the process of subclassing ``gymnasium.Env``, we will\nimplement a very simplistic game, called ``GridWorldEnv``. We will write\nthe code for our custom environment in\n``gymnasium_env/envs/grid_world.py``. The environment\nconsists of a 2-dimensional square grid of fixed size (specified via the\n``size`` parameter during construction). The agent can move vertically\nor horizontally between grid cells in each timestep. The goal of the\nagent is to navigate to a target on the grid that has been placed\nrandomly at the beginning of the episode.\n\n- Observations provide the location of the target and agent.\n- There are 4 actions in our environment, corresponding to the\n movements \u201cright\u201d, \u201cup\u201d, \u201cleft\u201d, and \u201cdown\u201d.\n- A done signal is issued as soon as the agent has navigated to the\n grid cell where the target is located.\n- Rewards are binary and sparse, meaning that the immediate reward is\n always zero, unless the agent has reached the target, then it is 1.\n\nAn episode in this environment (with ``size=5``) might look like this:\n\n .. image:: /_static/videos/tutorials/environment-creation-example-episode.gif\n :width: 400\n :alt: Example episode of the custom environment\n\nwhere the blue dot is the agent and the red square represents the\ntarget.\n\nLet us look at the source code of ``GridWorldEnv`` piece by piece:\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Declaration and Initialization\n\nOur custom environment will inherit from the abstract class\n``gymnasium.Env``. You shouldn\u2019t forget to add the ``metadata``\nattribute to your class. There, you should specify the render-modes that\nare supported by your environment (e.g., ``\"human\"``, ``\"rgb_array\"``,\n``\"ansi\"``) and the framerate at which your environment should be\nrendered. Every environment should support ``None`` as render-mode; you\ndon\u2019t need to add it in the metadata. In ``GridWorldEnv``, we will\nsupport the modes \u201crgb_array\u201d and \u201chuman\u201d and render at 4 FPS.\n\nThe ``__init__`` method of our environment will accept the integer\n``size``, that determines the size of the square grid. We will set up\nsome variables for rendering and define ``self.observation_space`` and\n``self.action_space``. In our case, observations should provide\ninformation about the location of the agent and target on the\n2-dimensional grid. We will choose to represent observations in the form\nof dictionaries with keys ``\"agent\"`` and ``\"target\"``. An observation\nmay look like ``{\"agent\": array([1, 0]), \"target\": array([0, 3])}``.\nSince we have 4 actions in our environment (\u201cright\u201d, \u201cup\u201d, \u201cleft\u201d,\n\u201cdown\u201d), we will use ``Discrete(4)`` as an action space. Here is the\ndeclaration of ``GridWorldEnv`` and the implementation of ``__init__``:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# gymnasium_env/envs/grid_world.py\nfrom enum import Enum\n\nimport numpy as np\nimport pygame\n\nimport gymnasium as gym\nfrom gymnasium import spaces\n\n\nclass Actions(Enum):\n RIGHT = 0\n UP = 1\n LEFT = 2\n DOWN = 3\n\n\nclass GridWorldEnv(gym.Env):\n metadata = {\"render_modes\": [\"human\", \"rgb_array\"], \"render_fps\": 4}\n\n def __init__(self, render_mode=None, size=5):\n self.size = size # The size of the square grid\n self.window_size = 512 # The size of the PyGame window\n\n # Observations are dictionaries with the agent's and the target's location.\n # Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).\n self.observation_space = spaces.Dict(\n {\n \"agent\": spaces.Box(0, size - 1, shape=(2,), dtype=int),\n \"target\": spaces.Box(0, size - 1, shape=(2,), dtype=int),\n }\n )\n self._agent_location = np.array([-1, -1], dtype=int)\n self._target_location = np.array([-1, -1], dtype=int)\n\n # We have 4 actions, corresponding to \"right\", \"up\", \"left\", \"down\"\n self.action_space = spaces.Discrete(4)\n\n \"\"\"\n The following dictionary maps abstract actions from `self.action_space` to\n the direction we will walk in if that action is taken.\n i.e. 0 corresponds to \"right\", 1 to \"up\" etc.\n \"\"\"\n self._action_to_direction = {\n Actions.RIGHT.value: np.array([1, 0]),\n Actions.UP.value: np.array([0, 1]),\n Actions.LEFT.value: np.array([-1, 0]),\n Actions.DOWN.value: np.array([0, -1]),\n }\n\n assert render_mode is None or render_mode in self.metadata[\"render_modes\"]\n self.render_mode = render_mode\n\n \"\"\"\n If human-rendering is used, `self.window` will be a reference\n to the window that we draw to. `self.clock` will be a clock that is used\n to ensure that the environment is rendered at the correct framerate in\n human-mode. They will remain `None` until human-mode is used for the\n first time.\n \"\"\"\n self.window = None\n self.clock = None"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Constructing Observations From Environment States\n\nSince we will need to compute observations both in ``reset`` and\n``step``, it is often convenient to have a (private) method ``_get_obs``\nthat translates the environment\u2019s state into an observation. However,\nthis is not mandatory and you may as well compute observations in\n``reset`` and ``step`` separately:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def _get_obs(self):\n return {\"agent\": self._agent_location, \"target\": self._target_location}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also implement a similar method for the auxiliary information\nthat is returned by ``step`` and ``reset``. In our case, we would like\nto provide the manhattan distance between the agent and the target:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def _get_info(self):\n return {\n \"distance\": np.linalg.norm(\n self._agent_location - self._target_location, ord=1\n )\n }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Oftentimes, info will also contain some data that is only available\ninside the ``step`` method (e.g., individual reward terms). In that case,\nwe would have to update the dictionary that is returned by ``_get_info``\nin ``step``.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Reset\n\nThe ``reset`` method will be called to initiate a new episode. You may\nassume that the ``step`` method will not be called before ``reset`` has\nbeen called. Moreover, ``reset`` should be called whenever a done signal\nhas been issued. Users may pass the ``seed`` keyword to ``reset`` to\ninitialize any random number generator that is used by the environment\nto a deterministic state. It is recommended to use the random number\ngenerator ``self.np_random`` that is provided by the environment\u2019s base\nclass, ``gymnasium.Env``. If you only use this RNG, you do not need to\nworry much about seeding, *but you need to remember to call\n``super().reset(seed=seed)``* to make sure that ``gymnasium.Env``\ncorrectly seeds the RNG. Once this is done, we can randomly set the\nstate of our environment. In our case, we randomly choose the agent\u2019s\nlocation and the random sample target positions, until it does not\ncoincide with the agent\u2019s position.\n\nThe ``reset`` method should return a tuple of the initial observation\nand some auxiliary information. We can use the methods ``_get_obs`` and\n``_get_info`` that we implemented earlier for that:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def reset(self, seed=None, options=None):\n # We need the following line to seed self.np_random\n super().reset(seed=seed)\n\n # Choose the agent's location uniformly at random\n self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)\n\n # We will sample the target's location randomly until it does not coincide with the agent's location\n self._target_location = self._agent_location\n while np.array_equal(self._target_location, self._agent_location):\n self._target_location = self.np_random.integers(\n 0, self.size, size=2, dtype=int\n )\n\n observation = self._get_obs()\n info = self._get_info()\n\n if self.render_mode == \"human\":\n self._render_frame()\n\n return observation, info"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step\n\nThe ``step`` method usually contains most of the logic of your\nenvironment. It accepts an ``action``, computes the state of the\nenvironment after applying that action and returns the 5-tuple\n``(observation, reward, terminated, truncated, info)``. See\n:meth:`gymnasium.Env.step`. Once the new state of the environment has\nbeen computed, we can check whether it is a terminal state and we set\n``done`` accordingly. Since we are using sparse binary rewards in\n``GridWorldEnv``, computing ``reward`` is trivial once we know\n``done``.To gather ``observation`` and ``info``, we can again make\nuse of ``_get_obs`` and ``_get_info``:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def step(self, action):\n # Map the action (element of {0,1,2,3}) to the direction we walk in\n direction = self._action_to_direction[action]\n # We use `np.clip` to make sure we don't leave the grid\n self._agent_location = np.clip(\n self._agent_location + direction, 0, self.size - 1\n )\n # An episode is done iff the agent has reached the target\n terminated = np.array_equal(self._agent_location, self._target_location)\n reward = 1 if terminated else 0 # Binary sparse rewards\n observation = self._get_obs()\n info = self._get_info()\n\n if self.render_mode == \"human\":\n self._render_frame()\n\n return observation, reward, terminated, False, info"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Rendering\n\nHere, we are using PyGame for rendering. A similar approach to rendering\nis used in many environments that are included with Gymnasium and you\ncan use it as a skeleton for your own environments:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def render(self):\n if self.render_mode == \"rgb_array\":\n return self._render_frame()\n\n def _render_frame(self):\n if self.window is None and self.render_mode == \"human\":\n pygame.init()\n pygame.display.init()\n self.window = pygame.display.set_mode(\n (self.window_size, self.window_size)\n )\n if self.clock is None and self.render_mode == \"human\":\n self.clock = pygame.time.Clock()\n\n canvas = pygame.Surface((self.window_size, self.window_size))\n canvas.fill((255, 255, 255))\n pix_square_size = (\n self.window_size / self.size\n ) # The size of a single grid square in pixels\n\n # First we draw the target\n pygame.draw.rect(\n canvas,\n (255, 0, 0),\n pygame.Rect(\n pix_square_size * self._target_location,\n (pix_square_size, pix_square_size),\n ),\n )\n # Now we draw the agent\n pygame.draw.circle(\n canvas,\n (0, 0, 255),\n (self._agent_location + 0.5) * pix_square_size,\n pix_square_size / 3,\n )\n\n # Finally, add some gridlines\n for x in range(self.size + 1):\n pygame.draw.line(\n canvas,\n 0,\n (0, pix_square_size * x),\n (self.window_size, pix_square_size * x),\n width=3,\n )\n pygame.draw.line(\n canvas,\n 0,\n (pix_square_size * x, 0),\n (pix_square_size * x, self.window_size),\n width=3,\n )\n\n if self.render_mode == \"human\":\n # The following line copies our drawings from `canvas` to the visible window\n self.window.blit(canvas, canvas.get_rect())\n pygame.event.pump()\n pygame.display.update()\n\n # We need to ensure that human-rendering occurs at the predefined framerate.\n # The following line will automatically add a delay to keep the framerate stable.\n self.clock.tick(self.metadata[\"render_fps\"])\n else: # rgb_array\n return np.transpose(\n np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)\n )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Close\n\nThe ``close`` method should close any open resources that were used by\nthe environment. In many cases, you don\u2019t actually have to bother to\nimplement this method. However, in our example ``render_mode`` may be\n``\"human\"`` and we might need to close the window that has been opened:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def close(self):\n if self.window is not None:\n pygame.display.quit()\n pygame.quit()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In other environments ``close`` might also close files that were opened\nor release other resources. You shouldn\u2019t interact with the environment\nafter having called ``close``.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Registering Envs\n\nIn order for the custom environments to be detected by Gymnasium, they\nmust be registered as follows. We will choose to put this code in\n``gymnasium_env/__init__.py``.\n\n.. code:: python\n\n from gymnasium.envs.registration import register\n\n register(\n id=\"gymnasium_env/GridWorld-v0\",\n entry_point=\"gymnasium_env.envs:GridWorldEnv\",\n )\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The environment ID consists of three components, two of which are\noptional: an optional namespace (here: ``gymnasium_env``), a mandatory\nname (here: ``GridWorld``) and an optional but recommended version\n(here: v0). It might have also been registered as ``GridWorld-v0`` (the\nrecommended approach), ``GridWorld`` or ``gymnasium_env/GridWorld``, and\nthe appropriate ID should then be used during environment creation.\n\nThe keyword argument ``max_episode_steps=300`` will ensure that\nGridWorld environments that are instantiated via ``gymnasium.make`` will\nbe wrapped in a ``TimeLimit`` wrapper (see [the wrapper\ndocumentation](/api/wrappers)_ for more information). A done signal\nwill then be produced if the agent has reached the target *or* 300 steps\nhave been executed in the current episode. To distinguish truncation and\ntermination, you can check ``info[\"TimeLimit.truncated\"]``.\n\nApart from ``id`` and ``entrypoint``, you may pass the following\nadditional keyword arguments to ``register``:\n\n+----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+\n| Name | Type | Default | Description |\n+======================+===========+===========+===============================================================================================================+\n| ``reward_threshold`` | ``float`` | ``None`` | The reward threshold before the task is considered solved |\n+----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+\n| ``nondeterministic`` | ``bool`` | ``False`` | Whether this environment is non-deterministic even after seeding |\n+----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+\n| ``max_episode_steps``| ``int`` | ``None`` | The maximum number of steps that an episode can consist of. If not ``None``, a ``TimeLimit`` wrapper is added |\n+----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+\n| ``order_enforce`` | ``bool`` | ``True`` | Whether to wrap the environment in an ``OrderEnforcing`` wrapper |\n+----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+\n| ``kwargs`` | ``dict`` | ``{}`` | The default kwargs to pass to the environment class |\n+----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+\n\nMost of these keywords (except for ``max_episode_steps``,\n``order_enforce`` and ``kwargs``) do not alter the behavior of\nenvironment instances but merely provide some extra information about\nyour environment. After registration, our custom ``GridWorldEnv``\nenvironment can be created with\n``env = gymnasium.make('gymnasium_env/GridWorld-v0')``.\n\n``gymnasium_env/envs/__init__.py`` should have:\n\n.. code:: python\n\n from gymnasium_env.envs.grid_world import GridWorldEnv\n\nIf your environment is not registered, you may optionally pass a module\nto import, that would register your environment before creating it like\nthis - ``env = gymnasium.make('module:Env-v0')``, where ``module``\ncontains the registration code. For the GridWorld env, the registration\ncode is run by importing ``gymnasium_env`` so if it were not possible to\nimport gymnasium_env explicitly, you could register while making by\n``env = gymnasium.make('gymnasium_env:gymnasium_env/GridWorld-v0')``. This\nis especially useful when you\u2019re allowed to pass only the environment ID\ninto a third-party codebase (eg. learning library). This lets you\nregister your environment without needing to edit the library\u2019s source\ncode.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creating a Package\n\nThe last step is to structure our code as a Python package. This\ninvolves configuring ``pyproject.toml``. A minimal example of how\nto do so is as follows:\n\n.. code:: toml\n\n [build-system]\n requires = [\"hatchling\"]\n build-backend = \"hatchling.build\"\n\n [project]\n name = \"gymnasium_env\"\n version = \"0.0.1\"\n dependencies = [\n \"gymnasium\",\n \"pygame==2.1.3\",\n \"pre-commit\",\n ]\n\n## Creating Environment Instances\n\nNow you can install your package locally with:\n\n.. code:: console\n\n pip install -e .\n\nAnd you can create an instance of the environment via:\n\n.. code:: python\n\n # run_gymnasium_env.py\n\n import gymnasium\n import gymnasium_env\n env = gymnasium.make('gymnasium_env/GridWorld-v0')\n\nYou can also pass keyword arguments of your environment\u2019s constructor to\n``gymnasium.make`` to customize the environment. In our case, we could\ndo:\n\n.. code:: python\n\n env = gymnasium.make('gymnasium_env/GridWorld-v0', size=10)\n\nSometimes, you may find it more convenient to skip registration and call\nthe environment\u2019s constructor yourself. Some may find this approach more\npythonic and environments that are instantiated like this are also\nperfectly fine (but remember to add wrappers as well!).\n\n## Using Wrappers\n\nOftentimes, we want to use different variants of a custom environment,\nor we want to modify the behavior of an environment that is provided by\nGymnasium or some other party. Wrappers allow us to do this without\nchanging the environment implementation or adding any boilerplate code.\nCheck out the [wrapper documentation](/api/wrappers/)_ for details on\nhow to use wrappers and instructions for implementing your own. In our\nexample, observations cannot be used directly in learning code because\nthey are dictionaries. However, we don\u2019t actually need to touch our\nenvironment implementation to fix this! We can simply add a wrapper on\ntop of environment instances to flatten observations into a single\narray:\n\n.. code:: python\n\n import gymnasium\n import gymnasium_env\n from gymnasium.wrappers import FlattenObservation\n\n env = gymnasium.make('gymnasium_env/GridWorld-v0')\n wrapped_env = FlattenObservation(env)\n print(wrapped_env.reset()) # E.g. [3 0 3 3], {}\n\nWrappers have the big advantage that they make environments highly\nmodular. For instance, instead of flattening the observations from\nGridWorld, you might only want to look at the relative position of the\ntarget and the agent. In the section on\n[ObservationWrappers](/api/wrappers/observation_wrappers/#observation-wrappers)_ we have\nimplemented a wrapper that does this job. This wrapper is also available\nin ``gymnasium_env/wrappers/relative_position.py``:\n\n.. code:: python\n\n import gymnasium\n import gymnasium_env\n from gymnasium_env.wrappers import RelativePosition\n\n env = gymnasium.make('gymnasium_env/GridWorld-v0')\n wrapped_env = RelativePosition(env)\n print(wrapped_env.reset()) # E.g. [-3 3], {}\n\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,551 @@
"""
Solving Frozenlake with Tabular Q-Learning
==========================================
This tutorial trains an agent for FrozenLake using tabular Q-learning.
"""
# %%
# In this post we'll compare a bunch of different map sizes on the
# `FrozenLake <https://gymnasium.farama.org/environments/toy_text/frozen_lake/>`__
# environment from the reinforcement learning
# `Gymnasium <https://gymnasium.farama.org/>`__ package using the
# Q-learning algorithm.
# %%
# Let's first import a few dependencies we'll need.
#
# Author: Andrea Pierré
# License: MIT License
from typing import NamedTuple
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import gymnasium as gym
from gymnasium.envs.toy_text.frozen_lake import generate_random_map
sns.set_theme()
# %load_ext lab_black
# %%
# Parameters we'll use
# --------------------
#
class Params(NamedTuple):
total_episodes: int # Total episodes
learning_rate: float # Learning rate
gamma: float # Discounting rate
epsilon: float # Exploration probability
map_size: int # Number of tiles of one side of the squared environment
seed: int # Define a seed so that we get reproducible results
is_slippery: bool # If true the player will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions
n_runs: int # Number of runs
action_size: int # Number of possible actions
state_size: int # Number of possible states
proba_frozen: float # Probability that a tile is frozen
params = Params(
total_episodes=2000,
learning_rate=0.8,
gamma=0.95,
epsilon=0.1,
map_size=5,
seed=123,
is_slippery=False,
n_runs=20,
action_size=None,
state_size=None,
proba_frozen=0.9,
)
params
# Set the seed
rng = np.random.default_rng(params.seed)
# %%
# The FrozenLake environment
# --------------------------
#
env = gym.make(
"FrozenLake-v1",
is_slippery=params.is_slippery,
render_mode="rgb_array",
desc=generate_random_map(
size=params.map_size, p=params.proba_frozen, seed=params.seed
),
)
# %%
# Creating the Q-table
# ~~~~~~~~~~~~~~~~~~~~
#
# In this tutorial we'll be using Q-learning as our learning algorithm and
# :math:`\epsilon`-greedy to decide which action to pick at each step. You
# can have a look at the `References section <#References>`__ for some
# refreshers on the theory. Now, let's create our Q-table initialized at
# zero with the states number as rows and the actions number as columns.
#
params = params._replace(action_size=env.action_space.n)
params = params._replace(state_size=env.observation_space.n)
print(f"Action size: {params.action_size}")
print(f"State size: {params.state_size}")
class Qlearning:
def __init__(self, learning_rate, gamma, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.learning_rate = learning_rate
self.gamma = gamma
self.reset_qtable()
def update(self, state, action, reward, new_state):
"""Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]"""
delta = (
reward
+ self.gamma * np.max(self.qtable[new_state, :])
- self.qtable[state, action]
)
q_update = self.qtable[state, action] + self.learning_rate * delta
return q_update
def reset_qtable(self):
"""Reset the Q-table."""
self.qtable = np.zeros((self.state_size, self.action_size))
class EpsilonGreedy:
def __init__(self, epsilon):
self.epsilon = epsilon
def choose_action(self, action_space, state, qtable):
"""Choose an action `a` in the current world state (s)."""
# First we randomize a number
explor_exploit_tradeoff = rng.uniform(0, 1)
# Exploration
if explor_exploit_tradeoff < self.epsilon:
action = action_space.sample()
# Exploitation (taking the biggest Q-value for this state)
else:
# Break ties randomly
# Find the indices where the Q-value equals the maximum value
# Choose a random action from the indices where the Q-value is maximum
max_ids = np.where(qtable[state, :] == max(qtable[state, :]))[0]
action = rng.choice(max_ids)
return action
# %%
# Running the environment
# ~~~~~~~~~~~~~~~~~~~~~~~
#
# Let's instantiate the learner and the explorer.
#
learner = Qlearning(
learning_rate=params.learning_rate,
gamma=params.gamma,
state_size=params.state_size,
action_size=params.action_size,
)
explorer = EpsilonGreedy(
epsilon=params.epsilon,
)
# %%
# This will be our main function to run our environment until the maximum
# number of episodes ``params.total_episodes``. To account for
# stochasticity, we will also run our environment a few times.
#
def run_env():
rewards = np.zeros((params.total_episodes, params.n_runs))
steps = np.zeros((params.total_episodes, params.n_runs))
episodes = np.arange(params.total_episodes)
qtables = np.zeros((params.n_runs, params.state_size, params.action_size))
all_states = []
all_actions = []
for run in range(params.n_runs): # Run several times to account for stochasticity
learner.reset_qtable() # Reset the Q-table between runs
for episode in tqdm(
episodes, desc=f"Run {run}/{params.n_runs} - Episodes", leave=False
):
state = env.reset(seed=params.seed)[0] # Reset the environment
step = 0
done = False
total_rewards = 0
while not done:
action = explorer.choose_action(
action_space=env.action_space, state=state, qtable=learner.qtable
)
# Log all states and actions
all_states.append(state)
all_actions.append(action)
# Take the action (a) and observe the outcome state(s') and reward (r)
new_state, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
learner.qtable[state, action] = learner.update(
state, action, reward, new_state
)
total_rewards += reward
step += 1
# Our new state is state
state = new_state
# Log all rewards and steps
rewards[episode, run] = total_rewards
steps[episode, run] = step
qtables[run, :, :] = learner.qtable
return rewards, steps, episodes, qtables, all_states, all_actions
# %%
# Visualization
# ~~~~~~~~~~~~~
#
# %%
# To make it easy to plot the results with Seaborn, we'll save the main
# results of the simulation in Pandas dataframes.
#
def postprocess(episodes, params, rewards, steps, map_size):
"""Convert the results of the simulation in dataframes."""
res = pd.DataFrame(
data={
"Episodes": np.tile(episodes, reps=params.n_runs),
"Rewards": rewards.flatten(order="F"),
"Steps": steps.flatten(order="F"),
}
)
res["cum_rewards"] = rewards.cumsum(axis=0).flatten(order="F")
res["map_size"] = np.repeat(f"{map_size}x{map_size}", res.shape[0])
st = pd.DataFrame(data={"Episodes": episodes, "Steps": steps.mean(axis=1)})
st["map_size"] = np.repeat(f"{map_size}x{map_size}", st.shape[0])
return res, st
# %%
# We want to plot the policy the agent has learned in the end. To do that
# we will: 1. extract the best Q-values from the Q-table for each state,
# 2. get the corresponding best action for those Q-values, 3. map each
# action to an arrow so we can visualize it.
#
def qtable_directions_map(qtable, map_size):
"""Get the best learned action & map it to arrows."""
qtable_val_max = qtable.max(axis=1).reshape(map_size, map_size)
qtable_best_action = np.argmax(qtable, axis=1).reshape(map_size, map_size)
directions = {0: "", 1: "", 2: "", 3: ""}
qtable_directions = np.empty(qtable_best_action.flatten().shape, dtype=str)
eps = np.finfo(float).eps # Minimum float number on the machine
for idx, val in enumerate(qtable_best_action.flatten()):
if qtable_val_max.flatten()[idx] > eps:
# Assign an arrow only if a minimal Q-value has been learned as best action
# otherwise since 0 is a direction, it also gets mapped on the tiles where
# it didn't actually learn anything
qtable_directions[idx] = directions[val]
qtable_directions = qtable_directions.reshape(map_size, map_size)
return qtable_val_max, qtable_directions
# %%
# With the following function, we'll plot on the left the last frame of
# the simulation. If the agent learned a good policy to solve the task, we
# expect to see it on the tile of the treasure in the last frame of the
# video. On the right we'll plot the policy the agent has learned. Each
# arrow will represent the best action to choose for each tile/state.
#
def plot_q_values_map(qtable, env, map_size):
"""Plot the last frame of the simulation and the policy learned."""
qtable_val_max, qtable_directions = qtable_directions_map(qtable, map_size)
# Plot the last frame
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))
ax[0].imshow(env.render())
ax[0].axis("off")
ax[0].set_title("Last frame")
# Plot the policy
sns.heatmap(
qtable_val_max,
annot=qtable_directions,
fmt="",
ax=ax[1],
cmap=sns.color_palette("Blues", as_cmap=True),
linewidths=0.7,
linecolor="black",
xticklabels=[],
yticklabels=[],
annot_kws={"fontsize": "xx-large"},
).set(title="Learned Q-values\nArrows represent best action")
for _, spine in ax[1].spines.items():
spine.set_visible(True)
spine.set_linewidth(0.7)
spine.set_color("black")
plt.show()
# %%
# As a sanity check, we will plot the distributions of states and actions
# with the following function:
#
def plot_states_actions_distribution(states, actions, map_size):
"""Plot the distributions of states and actions."""
labels = {"LEFT": 0, "DOWN": 1, "RIGHT": 2, "UP": 3}
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))
sns.histplot(data=states, ax=ax[0], kde=True)
ax[0].set_title("States")
sns.histplot(data=actions, ax=ax[1])
ax[1].set_xticks(list(labels.values()), labels=labels.keys())
ax[1].set_title("Actions")
fig.tight_layout()
plt.show()
# %%
# Now we'll be running our agent on a few increasing maps sizes: -
# :math:`4 \times 4`, - :math:`7 \times 7`, - :math:`9 \times 9`, -
# :math:`11 \times 11`.
#
# Putting it all together:
#
map_sizes = [4, 7, 9, 11]
res_all = pd.DataFrame()
st_all = pd.DataFrame()
for map_size in map_sizes:
env = gym.make(
"FrozenLake-v1",
is_slippery=params.is_slippery,
render_mode="rgb_array",
desc=generate_random_map(
size=map_size, p=params.proba_frozen, seed=params.seed
),
)
params = params._replace(action_size=env.action_space.n)
params = params._replace(state_size=env.observation_space.n)
env.action_space.seed(
params.seed
) # Set the seed to get reproducible results when sampling the action space
learner = Qlearning(
learning_rate=params.learning_rate,
gamma=params.gamma,
state_size=params.state_size,
action_size=params.action_size,
)
explorer = EpsilonGreedy(
epsilon=params.epsilon,
)
print(f"Map size: {map_size}x{map_size}")
rewards, steps, episodes, qtables, all_states, all_actions = run_env()
# Save the results in dataframes
res, st = postprocess(episodes, params, rewards, steps, map_size)
res_all = pd.concat([res_all, res])
st_all = pd.concat([st_all, st])
qtable = qtables.mean(axis=0) # Average the Q-table between runs
plot_states_actions_distribution(
states=all_states, actions=all_actions, map_size=map_size
) # Sanity check
plot_q_values_map(qtable, env, map_size)
env.close()
# %%
# Map size: :math:`4 \times 4`
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# %%
# |States actions histogram 4x4 map| |Q-values 4x4 map|
#
# .. |States actions histogram 4x4 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_4x4.png
# .. |Q-values 4x4 map| image:: ../../_static/img/tutorials/frozenlake_q_values_4x4.png
#
#
# %%
# Map size: :math:`7 \times 7`
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# %%
# |States actions histogram 7x7 map| |Q-values 7x7 map|
#
# .. |States actions histogram 7x7 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_7x7.png
# .. |Q-values 7x7 map| image:: ../../_static/img/tutorials/frozenlake_q_values_7x7.png
#
#
# %%
# Map size: :math:`9 \times 9`
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# %%
# |States actions histogram 9x9 map| |Q-values 9x9 map|
#
# .. |States actions histogram 9x9 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_9x9.png
# .. |Q-values 9x9 map| image:: ../../_static/img/tutorials/frozenlake_q_values_9x9.png
#
#
# %%
# Map size: :math:`11 \times 11`
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# %%
# |States actions histogram 11x11 map| |Q-values 11x11 map|
#
# .. |States actions histogram 11x11 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_11x11.png
# .. |Q-values 11x11 map| image:: ../../_static/img/tutorials/frozenlake_q_values_11x11.png
#
#
# %%
# The ``DOWN`` and ``RIGHT`` actions get chosen more often, which makes
# sense as the agent starts at the top left of the map and needs to find
# its way down to the bottom right. Also the bigger the map, the less
# states/tiles further away from the starting state get visited.
#
# %%
# To check if our agent is learning, we want to plot the cumulated sum of
# rewards, as well as the number of steps needed until the end of the
# episode. If our agent is learning, we expect to see the cumulated sum of
# rewards to increase and the number of steps to solve the task to
# decrease.
#
def plot_steps_and_rewards(rewards_df, steps_df):
"""Plot the steps and rewards from dataframes."""
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))
sns.lineplot(
data=rewards_df, x="Episodes", y="cum_rewards", hue="map_size", ax=ax[0]
)
ax[0].set(ylabel="Cumulated rewards")
sns.lineplot(data=steps_df, x="Episodes", y="Steps", hue="map_size", ax=ax[1])
ax[1].set(ylabel="Averaged steps number")
for axi in ax:
axi.legend(title="map size")
fig.tight_layout()
plt.show()
plot_steps_and_rewards(res_all, st_all)
# %%
# |Steps and rewards|
#
# .. |Steps and rewards| image:: ../../_static/img/tutorials/frozenlake_steps_and_rewards.png
#
#
# %%
# On the :math:`4 \times 4` map, learning converges pretty quickly,
# whereas on the :math:`7 \times 7` map, the agent needs :math:`\sim 300`
# episodes, on the :math:`9 \times 9` map it needs :math:`\sim 800`
# episodes, and the :math:`11 \times 11` map, it needs :math:`\sim 1800`
# episodes to converge. Interestingly, the agent seems to be getting more
# rewards on the :math:`9 \times 9` map than on the :math:`7 \times 7`
# map, which could mean it didn't reach an optimal policy on the
# :math:`7 \times 7` map.
#
# %%
# In the end, if agent doesn't get any rewards, rewards don't get
# propagated in the Q-values, and the agent doesn't learn anything. In my
# experience on this environment using :math:`\epsilon`-greedy and those
# hyperparameters and environment settings, maps having more than
# :math:`11 \times 11` tiles start to be difficult to solve. Maybe using a
# different exploration algorithm could overcome this. The other parameter
# having a big impact is the ``proba_frozen``, the probability of the tile
# being frozen. With too many holes, i.e. :math:`p<0.9`, Q-learning is
# having a hard time in not falling into holes and getting a reward
# signal.
#
# %%
# References
# ----------
#
# - Code inspired by `Deep Reinforcement Learning
# Course <https://simoninithomas.github.io/Deep_reinforcement_learning_Course/>`__
# by Thomas Simonini (http://simoninithomas.com/)
# - `Dissecting Reinforcement
# Learning-Part.2 <https://mpatacchiola.github.io/blog/2017/01/15/dissecting-reinforcement-learning-2.html>`__
# - `David Silvers course <https://www.davidsilver.uk/teaching/>`__ in
# particular lesson 4 and lesson 5
# - `Q-learning article on
# Wikipedia <https://en.wikipedia.org/wiki/Q-learning>`__
# - `Q-Learning: Off-Policy TD
# Control <http://incompleteideas.net/book/ebook/node65.html>`__ in
# `Reinforcement Learning: An Introduction, by Richard S. Sutton and
# Andrew G. Barto <http://incompleteideas.net/book/ebook/>`__
# - `Epsilon-Greedy
# Q-learning <https://www.baeldung.com/cs/epsilon-greedy-q-learning>`__
# - `Introduction to Reinforcement
# Learning <https://gibberblot.github.io/rl-notes/index.html>`__ by Tim
# Miller (University of Melbourne)
#

View File

@@ -0,0 +1,353 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Solving Frozenlake with Tabular Q-Learning\n\nThis tutorial trains an agent for FrozenLake using tabular Q-learning.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this post we'll compare a bunch of different map sizes on the\n[FrozenLake](https://gymnasium.farama.org/environments/toy_text/frozen_lake/)_\nenvironment from the reinforcement learning\n[Gymnasium](https://gymnasium.farama.org/)_ package using the\nQ-learning algorithm.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's first import a few dependencies we'll need.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Author: Andrea Pierr\u00e9\n# License: MIT License\n\nfrom typing import NamedTuple\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\nfrom tqdm import tqdm\n\nimport gymnasium as gym\nfrom gymnasium.envs.toy_text.frozen_lake import generate_random_map\n\n\nsns.set_theme()\n\n# %load_ext lab_black"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Parameters we'll use\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class Params(NamedTuple):\n total_episodes: int # Total episodes\n learning_rate: float # Learning rate\n gamma: float # Discounting rate\n epsilon: float # Exploration probability\n map_size: int # Number of tiles of one side of the squared environment\n seed: int # Define a seed so that we get reproducible results\n is_slippery: bool # If true the player will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions\n n_runs: int # Number of runs\n action_size: int # Number of possible actions\n state_size: int # Number of possible states\n proba_frozen: float # Probability that a tile is frozen\n\n\nparams = Params(\n total_episodes=2000,\n learning_rate=0.8,\n gamma=0.95,\n epsilon=0.1,\n map_size=5,\n seed=123,\n is_slippery=False,\n n_runs=20,\n action_size=None,\n state_size=None,\n proba_frozen=0.9,\n)\nparams\n\n# Set the seed\nrng = np.random.default_rng(params.seed)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## The FrozenLake environment\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"env = gym.make(\n \"FrozenLake-v1\",\n is_slippery=params.is_slippery,\n render_mode=\"rgb_array\",\n desc=generate_random_map(\n size=params.map_size, p=params.proba_frozen, seed=params.seed\n ),\n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating the Q-table\n\nIn this tutorial we'll be using Q-learning as our learning algorithm and\n$\\epsilon$-greedy to decide which action to pick at each step. You\ncan have a look at the [References section](#References)_ for some\nrefreshers on the theory. Now, let's create our Q-table initialized at\nzero with the states number as rows and the actions number as columns.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"params = params._replace(action_size=env.action_space.n)\nparams = params._replace(state_size=env.observation_space.n)\nprint(f\"Action size: {params.action_size}\")\nprint(f\"State size: {params.state_size}\")\n\n\nclass Qlearning:\n def __init__(self, learning_rate, gamma, state_size, action_size):\n self.state_size = state_size\n self.action_size = action_size\n self.learning_rate = learning_rate\n self.gamma = gamma\n self.reset_qtable()\n\n def update(self, state, action, reward, new_state):\n \"\"\"Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\"\"\"\n delta = (\n reward\n + self.gamma * np.max(self.qtable[new_state, :])\n - self.qtable[state, action]\n )\n q_update = self.qtable[state, action] + self.learning_rate * delta\n return q_update\n\n def reset_qtable(self):\n \"\"\"Reset the Q-table.\"\"\"\n self.qtable = np.zeros((self.state_size, self.action_size))\n\n\nclass EpsilonGreedy:\n def __init__(self, epsilon):\n self.epsilon = epsilon\n\n def choose_action(self, action_space, state, qtable):\n \"\"\"Choose an action `a` in the current world state (s).\"\"\"\n # First we randomize a number\n explor_exploit_tradeoff = rng.uniform(0, 1)\n\n # Exploration\n if explor_exploit_tradeoff < self.epsilon:\n action = action_space.sample()\n\n # Exploitation (taking the biggest Q-value for this state)\n else:\n # Break ties randomly\n # Find the indices where the Q-value equals the maximum value\n # Choose a random action from the indices where the Q-value is maximum\n max_ids = np.where(qtable[state, :] == max(qtable[state, :]))[0]\n action = rng.choice(max_ids)\n return action"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Running the environment\n\nLet's instantiate the learner and the explorer.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"learner = Qlearning(\n learning_rate=params.learning_rate,\n gamma=params.gamma,\n state_size=params.state_size,\n action_size=params.action_size,\n)\nexplorer = EpsilonGreedy(\n epsilon=params.epsilon,\n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This will be our main function to run our environment until the maximum\nnumber of episodes ``params.total_episodes``. To account for\nstochasticity, we will also run our environment a few times.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def run_env():\n rewards = np.zeros((params.total_episodes, params.n_runs))\n steps = np.zeros((params.total_episodes, params.n_runs))\n episodes = np.arange(params.total_episodes)\n qtables = np.zeros((params.n_runs, params.state_size, params.action_size))\n all_states = []\n all_actions = []\n\n for run in range(params.n_runs): # Run several times to account for stochasticity\n learner.reset_qtable() # Reset the Q-table between runs\n\n for episode in tqdm(\n episodes, desc=f\"Run {run}/{params.n_runs} - Episodes\", leave=False\n ):\n state = env.reset(seed=params.seed)[0] # Reset the environment\n step = 0\n done = False\n total_rewards = 0\n\n while not done:\n action = explorer.choose_action(\n action_space=env.action_space, state=state, qtable=learner.qtable\n )\n\n # Log all states and actions\n all_states.append(state)\n all_actions.append(action)\n\n # Take the action (a) and observe the outcome state(s') and reward (r)\n new_state, reward, terminated, truncated, info = env.step(action)\n\n done = terminated or truncated\n\n learner.qtable[state, action] = learner.update(\n state, action, reward, new_state\n )\n\n total_rewards += reward\n step += 1\n\n # Our new state is state\n state = new_state\n\n # Log all rewards and steps\n rewards[episode, run] = total_rewards\n steps[episode, run] = step\n qtables[run, :, :] = learner.qtable\n\n return rewards, steps, episodes, qtables, all_states, all_actions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Visualization\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To make it easy to plot the results with Seaborn, we'll save the main\nresults of the simulation in Pandas dataframes.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def postprocess(episodes, params, rewards, steps, map_size):\n \"\"\"Convert the results of the simulation in dataframes.\"\"\"\n res = pd.DataFrame(\n data={\n \"Episodes\": np.tile(episodes, reps=params.n_runs),\n \"Rewards\": rewards.flatten(order=\"F\"),\n \"Steps\": steps.flatten(order=\"F\"),\n }\n )\n res[\"cum_rewards\"] = rewards.cumsum(axis=0).flatten(order=\"F\")\n res[\"map_size\"] = np.repeat(f\"{map_size}x{map_size}\", res.shape[0])\n\n st = pd.DataFrame(data={\"Episodes\": episodes, \"Steps\": steps.mean(axis=1)})\n st[\"map_size\"] = np.repeat(f\"{map_size}x{map_size}\", st.shape[0])\n return res, st"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We want to plot the policy the agent has learned in the end. To do that\nwe will: 1. extract the best Q-values from the Q-table for each state,\n2. get the corresponding best action for those Q-values, 3. map each\naction to an arrow so we can visualize it.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def qtable_directions_map(qtable, map_size):\n \"\"\"Get the best learned action & map it to arrows.\"\"\"\n qtable_val_max = qtable.max(axis=1).reshape(map_size, map_size)\n qtable_best_action = np.argmax(qtable, axis=1).reshape(map_size, map_size)\n directions = {0: \"\u2190\", 1: \"\u2193\", 2: \"\u2192\", 3: \"\u2191\"}\n qtable_directions = np.empty(qtable_best_action.flatten().shape, dtype=str)\n eps = np.finfo(float).eps # Minimum float number on the machine\n for idx, val in enumerate(qtable_best_action.flatten()):\n if qtable_val_max.flatten()[idx] > eps:\n # Assign an arrow only if a minimal Q-value has been learned as best action\n # otherwise since 0 is a direction, it also gets mapped on the tiles where\n # it didn't actually learn anything\n qtable_directions[idx] = directions[val]\n qtable_directions = qtable_directions.reshape(map_size, map_size)\n return qtable_val_max, qtable_directions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"With the following function, we'll plot on the left the last frame of\nthe simulation. If the agent learned a good policy to solve the task, we\nexpect to see it on the tile of the treasure in the last frame of the\nvideo. On the right we'll plot the policy the agent has learned. Each\narrow will represent the best action to choose for each tile/state.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def plot_q_values_map(qtable, env, map_size):\n \"\"\"Plot the last frame of the simulation and the policy learned.\"\"\"\n qtable_val_max, qtable_directions = qtable_directions_map(qtable, map_size)\n\n # Plot the last frame\n fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))\n ax[0].imshow(env.render())\n ax[0].axis(\"off\")\n ax[0].set_title(\"Last frame\")\n\n # Plot the policy\n sns.heatmap(\n qtable_val_max,\n annot=qtable_directions,\n fmt=\"\",\n ax=ax[1],\n cmap=sns.color_palette(\"Blues\", as_cmap=True),\n linewidths=0.7,\n linecolor=\"black\",\n xticklabels=[],\n yticklabels=[],\n annot_kws={\"fontsize\": \"xx-large\"},\n ).set(title=\"Learned Q-values\\nArrows represent best action\")\n for _, spine in ax[1].spines.items():\n spine.set_visible(True)\n spine.set_linewidth(0.7)\n spine.set_color(\"black\")\n plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As a sanity check, we will plot the distributions of states and actions\nwith the following function:\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def plot_states_actions_distribution(states, actions, map_size):\n \"\"\"Plot the distributions of states and actions.\"\"\"\n labels = {\"LEFT\": 0, \"DOWN\": 1, \"RIGHT\": 2, \"UP\": 3}\n\n fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))\n sns.histplot(data=states, ax=ax[0], kde=True)\n ax[0].set_title(\"States\")\n sns.histplot(data=actions, ax=ax[1])\n ax[1].set_xticks(list(labels.values()), labels=labels.keys())\n ax[1].set_title(\"Actions\")\n fig.tight_layout()\n plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we'll be running our agent on a few increasing maps sizes: -\n$4 \\times 4$, - $7 \\times 7$, - $9 \\times 9$, -\n$11 \\times 11$.\n\nPutting it all together:\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"map_sizes = [4, 7, 9, 11]\nres_all = pd.DataFrame()\nst_all = pd.DataFrame()\n\nfor map_size in map_sizes:\n env = gym.make(\n \"FrozenLake-v1\",\n is_slippery=params.is_slippery,\n render_mode=\"rgb_array\",\n desc=generate_random_map(\n size=map_size, p=params.proba_frozen, seed=params.seed\n ),\n )\n\n params = params._replace(action_size=env.action_space.n)\n params = params._replace(state_size=env.observation_space.n)\n env.action_space.seed(\n params.seed\n ) # Set the seed to get reproducible results when sampling the action space\n learner = Qlearning(\n learning_rate=params.learning_rate,\n gamma=params.gamma,\n state_size=params.state_size,\n action_size=params.action_size,\n )\n explorer = EpsilonGreedy(\n epsilon=params.epsilon,\n )\n\n print(f\"Map size: {map_size}x{map_size}\")\n rewards, steps, episodes, qtables, all_states, all_actions = run_env()\n\n # Save the results in dataframes\n res, st = postprocess(episodes, params, rewards, steps, map_size)\n res_all = pd.concat([res_all, res])\n st_all = pd.concat([st_all, st])\n qtable = qtables.mean(axis=0) # Average the Q-table between runs\n\n plot_states_actions_distribution(\n states=all_states, actions=all_actions, map_size=map_size\n ) # Sanity check\n plot_q_values_map(qtable, env, map_size)\n\n env.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Map size: $4 \\times 4$\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"|States actions histogram 4x4 map| |Q-values 4x4 map|\n\n.. |States actions histogram 4x4 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_4x4.png\n.. |Q-values 4x4 map| image:: ../../_static/img/tutorials/frozenlake_q_values_4x4.png\n\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Map size: $7 \\times 7$\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"|States actions histogram 7x7 map| |Q-values 7x7 map|\n\n.. |States actions histogram 7x7 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_7x7.png\n.. |Q-values 7x7 map| image:: ../../_static/img/tutorials/frozenlake_q_values_7x7.png\n\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Map size: $9 \\times 9$\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"|States actions histogram 9x9 map| |Q-values 9x9 map|\n\n.. |States actions histogram 9x9 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_9x9.png\n.. |Q-values 9x9 map| image:: ../../_static/img/tutorials/frozenlake_q_values_9x9.png\n\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Map size: $11 \\times 11$\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"|States actions histogram 11x11 map| |Q-values 11x11 map|\n\n.. |States actions histogram 11x11 map| image:: ../../_static/img/tutorials/frozenlake_states_actions_distrib_11x11.png\n.. |Q-values 11x11 map| image:: ../../_static/img/tutorials/frozenlake_q_values_11x11.png\n\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The ``DOWN`` and ``RIGHT`` actions get chosen more often, which makes\nsense as the agent starts at the top left of the map and needs to find\nits way down to the bottom right. Also the bigger the map, the less\nstates/tiles further away from the starting state get visited.\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To check if our agent is learning, we want to plot the cumulated sum of\nrewards, as well as the number of steps needed until the end of the\nepisode. If our agent is learning, we expect to see the cumulated sum of\nrewards to increase and the number of steps to solve the task to\ndecrease.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def plot_steps_and_rewards(rewards_df, steps_df):\n \"\"\"Plot the steps and rewards from dataframes.\"\"\"\n fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))\n sns.lineplot(\n data=rewards_df, x=\"Episodes\", y=\"cum_rewards\", hue=\"map_size\", ax=ax[0]\n )\n ax[0].set(ylabel=\"Cumulated rewards\")\n\n sns.lineplot(data=steps_df, x=\"Episodes\", y=\"Steps\", hue=\"map_size\", ax=ax[1])\n ax[1].set(ylabel=\"Averaged steps number\")\n\n for axi in ax:\n axi.legend(title=\"map size\")\n fig.tight_layout()\n plt.show()\n\n\nplot_steps_and_rewards(res_all, st_all)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"|Steps and rewards|\n\n.. |Steps and rewards| image:: ../../_static/img/tutorials/frozenlake_steps_and_rewards.png\n\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On the $4 \\times 4$ map, learning converges pretty quickly,\nwhereas on the $7 \\times 7$ map, the agent needs $\\sim 300$\nepisodes, on the $9 \\times 9$ map it needs $\\sim 800$\nepisodes, and the $11 \\times 11$ map, it needs $\\sim 1800$\nepisodes to converge. Interestingly, the agent seems to be getting more\nrewards on the $9 \\times 9$ map than on the $7 \\times 7$\nmap, which could mean it didn't reach an optimal policy on the\n$7 \\times 7$ map.\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the end, if agent doesn't get any rewards, rewards don't get\npropagated in the Q-values, and the agent doesn't learn anything. In my\nexperience on this environment using $\\epsilon$-greedy and those\nhyperparameters and environment settings, maps having more than\n$11 \\times 11$ tiles start to be difficult to solve. Maybe using a\ndifferent exploration algorithm could overcome this. The other parameter\nhaving a big impact is the ``proba_frozen``, the probability of the tile\nbeing frozen. With too many holes, i.e. $p<0.9$, Q-learning is\nhaving a hard time in not falling into holes and getting a reward\nsignal.\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## References\n\n- Code inspired by [Deep Reinforcement Learning\n Course](https://simoninithomas.github.io/Deep_reinforcement_learning_Course/)_\n by Thomas Simonini (http://simoninithomas.com/)\n- [Dissecting Reinforcement\n Learning-Part.2](https://mpatacchiola.github.io/blog/2017/01/15/dissecting-reinforcement-learning-2.html)_\n- [David Silver\u2019s course](https://www.davidsilver.uk/teaching/)_ in\n particular lesson 4 and lesson 5\n- [Q-learning article on\n Wikipedia](https://en.wikipedia.org/wiki/Q-learning)_\n- [Q-Learning: Off-Policy TD\n Control](http://incompleteideas.net/book/ebook/node65.html)_ in\n [Reinforcement Learning: An Introduction, by Richard S. Sutton and\n Andrew G. Barto](http://incompleteideas.net/book/ebook/)_\n- [Epsilon-Greedy\n Q-learning](https://www.baeldung.com/cs/epsilon-greedy-q-learning)_\n- [Introduction to Reinforcement\n Learning](https://gibberblot.github.io/rl-notes/index.html)_ by Tim\n Miller (University of Melbourne)\n\n\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,46 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Handling Time Limits\n\nThis tutorial explains how time limits should be correctly handled with `termination` and `truncation` signals.\n\nThe ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended.\nHowever, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``.\n\n## Termination\n\nTermination refers to the episode ending after reaching a terminal state that is defined as part of the environment\ndefinition. Examples are - task success, task failure, robot falling down etc. Notably, this also includes episodes\nending in finite-horizon environments due to a time-limit inherent to the environment. Note that to preserve Markov\nproperty, a representation of the remaining time must be present in the agent's observation in finite-horizon environments.\n[(Reference)](https://arxiv.org/abs/1712.00378)\n\n## Truncation\n\nTruncation refers to the episode ending after an externally defined condition (that is outside the scope of the Markov\nDecision Process). This could be a time-limit, a robot going out of bounds etc.\n\nAn infinite-horizon environment is an obvious example of where this is needed. We cannot wait forever for the episode\nto complete, so we set a practical time-limit after which we forcibly halt the episode. The last state in this case is\nnot a terminal state since it has a non-zero transition probability of moving to another state as per the Markov\nDecision Process that defines the RL problem. This is also different from time-limits in finite horizon environments\nas the agent in this case has no idea about this time-limit.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Importance in learning code\nBootstrapping (using one or more estimated values of a variable to update estimates of the same variable) is a key\naspect of Reinforcement Learning. A value function will tell you how much discounted reward you will get from a\nparticular state if you follow a given policy. When an episode stops at any given point, by looking at the value of\nthe final state, the agent is able to estimate how much discounted reward could have been obtained if the episode has\ncontinued. This is an example of handling truncation.\n\nMore formally, a common example of bootstrapping in RL is updating the estimate of the Q-value function,\n\n\\begin{align}Q_{target}(o_t, a_t) = r_t + \\gamma . \\max_a(Q(o_{t+1}, a_{t+1}))\\end{align}\n\n\nIn classical RL, the new ``Q`` estimate is a weighted average of the previous ``Q`` estimate and ``Q_target`` while in Deep\nQ-Learning, the error between ``Q_target`` and the previous ``Q`` estimate is minimized.\n\nHowever, at the terminal state, bootstrapping is not done,\n\n\\begin{align}Q_{target}(o_t, a_t) = r_t\\end{align}\n\nThis is where the distinction between termination and truncation becomes important. When an episode ends due to\ntermination we don't bootstrap, when it ends due to truncation, we bootstrap.\n\nWhile using gymnasium environments, the ``done`` signal (default for < v0.26) is frequently used to determine whether to\nbootstrap or not. However, this is incorrect since it does not differentiate between termination and truncation.\n\nA simple example of value functions is shown below. This is an illustrative example and not part of any specific algorithm.\n\n.. code:: python\n\n # INCORRECT\n vf_target = rew + gamma * (1 - done) * vf_next_state\n\nThis is incorrect in the case of episode ending due to a truncation, where bootstrapping needs to happen but it doesn't.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Solution\n\nFrom v0.26 onwards, Gymnasium's ``env.step`` API returns both termination and truncation information explicitly.\nIn the previous version truncation information was supplied through the info key ``TimeLimit.truncated``.\nThe correct way to handle terminations and truncations now is,\n\n.. code:: python\n\n # terminated = done and 'TimeLimit.truncated' not in info\n # This was needed in previous versions.\n\n vf_target = rew + gamma * (1 - terminated) * vf_next_state\n\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,573 @@
# fmt: off
"""
Make your own custom environment
================================
This tutorial shows how to create new environment and links to relevant useful wrappers, utilities and tests included in Gymnasium.
Setup
------
Recommended solution
~~~~~~~~~~~~~~~~~~~~
1. Install ``pipx`` following the `pipx documentation <https://pypa.github.io/pipx/installation/>`_.
2. Then install Copier:
.. code:: console
pipx install copier
Alternative solutions
~~~~~~~~~~~~~~~~~~~~~
Install Copier with Pip or Conda:
.. code:: console
pip install copier
or
.. code:: console
conda install -c conda-forge copier
Generate your environment
------------------------------
You can check that ``Copier`` has been correctly installed by running the following command, which should output a version number:
.. code:: console
copier --version
Then you can just run the following command and replace the string ``path/to/directory`` by the path to the directory where you want to create your new project.
.. code:: console
copier copy https://github.com/Farama-Foundation/gymnasium-env-template.git "path/to/directory"
Answer the questions, and when it's finished you should get a project structure like the following:
.. code:: sh
.
├── gymnasium_env
│ ├── envs
│ │ ├── grid_world.py
│ │ └── __init__.py
│ ├── __init__.py
│ └── wrappers
│ ├── clip_reward.py
│ ├── discrete_actions.py
│ ├── __init__.py
│ ├── reacher_weighted_reward.py
│ └── relative_position.py
├── LICENSE
├── pyproject.toml
└── README.md
Subclassing gymnasium.Env
-------------------------
Before learning how to create your own environment you should check out
`the documentation of Gymnasiums API </api/env>`__.
To illustrate the process of subclassing ``gymnasium.Env``, we will
implement a very simplistic game, called ``GridWorldEnv``. We will write
the code for our custom environment in
``gymnasium_env/envs/grid_world.py``. The environment
consists of a 2-dimensional square grid of fixed size (specified via the
``size`` parameter during construction). The agent can move vertically
or horizontally between grid cells in each timestep. The goal of the
agent is to navigate to a target on the grid that has been placed
randomly at the beginning of the episode.
- Observations provide the location of the target and agent.
- There are 4 actions in our environment, corresponding to the
movements “right”, “up”, “left”, and “down”.
- A done signal is issued as soon as the agent has navigated to the
grid cell where the target is located.
- Rewards are binary and sparse, meaning that the immediate reward is
always zero, unless the agent has reached the target, then it is 1.
An episode in this environment (with ``size=5``) might look like this:
.. image:: /_static/videos/tutorials/environment-creation-example-episode.gif
:width: 400
:alt: Example episode of the custom environment
where the blue dot is the agent and the red square represents the
target.
Let us look at the source code of ``GridWorldEnv`` piece by piece:
"""
# %%
# Declaration and Initialization
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Our custom environment will inherit from the abstract class
# ``gymnasium.Env``. You shouldnt forget to add the ``metadata``
# attribute to your class. There, you should specify the render-modes that
# are supported by your environment (e.g., ``"human"``, ``"rgb_array"``,
# ``"ansi"``) and the framerate at which your environment should be
# rendered. Every environment should support ``None`` as render-mode; you
# dont need to add it in the metadata. In ``GridWorldEnv``, we will
# support the modes “rgb_array” and “human” and render at 4 FPS.
#
# The ``__init__`` method of our environment will accept the integer
# ``size``, that determines the size of the square grid. We will set up
# some variables for rendering and define ``self.observation_space`` and
# ``self.action_space``. In our case, observations should provide
# information about the location of the agent and target on the
# 2-dimensional grid. We will choose to represent observations in the form
# of dictionaries with keys ``"agent"`` and ``"target"``. An observation
# may look like ``{"agent": array([1, 0]), "target": array([0, 3])}``.
# Since we have 4 actions in our environment (“right”, “up”, “left”,
# “down”), we will use ``Discrete(4)`` as an action space. Here is the
# declaration of ``GridWorldEnv`` and the implementation of ``__init__``:
# gymnasium_env/envs/grid_world.py
from enum import Enum
import numpy as np
import pygame
import gymnasium as gym
from gymnasium import spaces
class Actions(Enum):
RIGHT = 0
UP = 1
LEFT = 2
DOWN = 3
class GridWorldEnv(gym.Env):
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
def __init__(self, render_mode=None, size=5):
self.size = size # The size of the square grid
self.window_size = 512 # The size of the PyGame window
# Observations are dictionaries with the agent's and the target's location.
# Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
self.observation_space = spaces.Dict(
{
"agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
"target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
}
)
self._agent_location = np.array([-1, -1], dtype=int)
self._target_location = np.array([-1, -1], dtype=int)
# We have 4 actions, corresponding to "right", "up", "left", "down"
self.action_space = spaces.Discrete(4)
"""
The following dictionary maps abstract actions from `self.action_space` to
the direction we will walk in if that action is taken.
i.e. 0 corresponds to "right", 1 to "up" etc.
"""
self._action_to_direction = {
Actions.RIGHT.value: np.array([1, 0]),
Actions.UP.value: np.array([0, 1]),
Actions.LEFT.value: np.array([-1, 0]),
Actions.DOWN.value: np.array([0, -1]),
}
assert render_mode is None or render_mode in self.metadata["render_modes"]
self.render_mode = render_mode
"""
If human-rendering is used, `self.window` will be a reference
to the window that we draw to. `self.clock` will be a clock that is used
to ensure that the environment is rendered at the correct framerate in
human-mode. They will remain `None` until human-mode is used for the
first time.
"""
self.window = None
self.clock = None
# %%
# Constructing Observations From Environment States
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Since we will need to compute observations both in ``reset`` and
# ``step``, it is often convenient to have a (private) method ``_get_obs``
# that translates the environments state into an observation. However,
# this is not mandatory and you may as well compute observations in
# ``reset`` and ``step`` separately:
def _get_obs(self):
return {"agent": self._agent_location, "target": self._target_location}
# %%
# We can also implement a similar method for the auxiliary information
# that is returned by ``step`` and ``reset``. In our case, we would like
# to provide the manhattan distance between the agent and the target:
def _get_info(self):
return {
"distance": np.linalg.norm(
self._agent_location - self._target_location, ord=1
)
}
# %%
# Oftentimes, info will also contain some data that is only available
# inside the ``step`` method (e.g., individual reward terms). In that case,
# we would have to update the dictionary that is returned by ``_get_info``
# in ``step``.
# %%
# Reset
# ~~~~~
#
# The ``reset`` method will be called to initiate a new episode. You may
# assume that the ``step`` method will not be called before ``reset`` has
# been called. Moreover, ``reset`` should be called whenever a done signal
# has been issued. Users may pass the ``seed`` keyword to ``reset`` to
# initialize any random number generator that is used by the environment
# to a deterministic state. It is recommended to use the random number
# generator ``self.np_random`` that is provided by the environments base
# class, ``gymnasium.Env``. If you only use this RNG, you do not need to
# worry much about seeding, *but you need to remember to call
# ``super().reset(seed=seed)``* to make sure that ``gymnasium.Env``
# correctly seeds the RNG. Once this is done, we can randomly set the
# state of our environment. In our case, we randomly choose the agents
# location and the random sample target positions, until it does not
# coincide with the agents position.
#
# The ``reset`` method should return a tuple of the initial observation
# and some auxiliary information. We can use the methods ``_get_obs`` and
# ``_get_info`` that we implemented earlier for that:
def reset(self, seed=None, options=None):
# We need the following line to seed self.np_random
super().reset(seed=seed)
# Choose the agent's location uniformly at random
self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)
# We will sample the target's location randomly until it does not coincide with the agent's location
self._target_location = self._agent_location
while np.array_equal(self._target_location, self._agent_location):
self._target_location = self.np_random.integers(
0, self.size, size=2, dtype=int
)
observation = self._get_obs()
info = self._get_info()
if self.render_mode == "human":
self._render_frame()
return observation, info
# %%
# Step
# ~~~~
#
# The ``step`` method usually contains most of the logic of your
# environment. It accepts an ``action``, computes the state of the
# environment after applying that action and returns the 5-tuple
# ``(observation, reward, terminated, truncated, info)``. See
# :meth:`gymnasium.Env.step`. Once the new state of the environment has
# been computed, we can check whether it is a terminal state and we set
# ``done`` accordingly. Since we are using sparse binary rewards in
# ``GridWorldEnv``, computing ``reward`` is trivial once we know
# ``done``.To gather ``observation`` and ``info``, we can again make
# use of ``_get_obs`` and ``_get_info``:
def step(self, action):
# Map the action (element of {0,1,2,3}) to the direction we walk in
direction = self._action_to_direction[action]
# We use `np.clip` to make sure we don't leave the grid
self._agent_location = np.clip(
self._agent_location + direction, 0, self.size - 1
)
# An episode is done iff the agent has reached the target
terminated = np.array_equal(self._agent_location, self._target_location)
reward = 1 if terminated else 0 # Binary sparse rewards
observation = self._get_obs()
info = self._get_info()
if self.render_mode == "human":
self._render_frame()
return observation, reward, terminated, False, info
# %%
# Rendering
# ~~~~~~~~~
#
# Here, we are using PyGame for rendering. A similar approach to rendering
# is used in many environments that are included with Gymnasium and you
# can use it as a skeleton for your own environments:
def render(self):
if self.render_mode == "rgb_array":
return self._render_frame()
def _render_frame(self):
if self.window is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.window = pygame.display.set_mode(
(self.window_size, self.window_size)
)
if self.clock is None and self.render_mode == "human":
self.clock = pygame.time.Clock()
canvas = pygame.Surface((self.window_size, self.window_size))
canvas.fill((255, 255, 255))
pix_square_size = (
self.window_size / self.size
) # The size of a single grid square in pixels
# First we draw the target
pygame.draw.rect(
canvas,
(255, 0, 0),
pygame.Rect(
pix_square_size * self._target_location,
(pix_square_size, pix_square_size),
),
)
# Now we draw the agent
pygame.draw.circle(
canvas,
(0, 0, 255),
(self._agent_location + 0.5) * pix_square_size,
pix_square_size / 3,
)
# Finally, add some gridlines
for x in range(self.size + 1):
pygame.draw.line(
canvas,
0,
(0, pix_square_size * x),
(self.window_size, pix_square_size * x),
width=3,
)
pygame.draw.line(
canvas,
0,
(pix_square_size * x, 0),
(pix_square_size * x, self.window_size),
width=3,
)
if self.render_mode == "human":
# The following line copies our drawings from `canvas` to the visible window
self.window.blit(canvas, canvas.get_rect())
pygame.event.pump()
pygame.display.update()
# We need to ensure that human-rendering occurs at the predefined framerate.
# The following line will automatically add a delay to keep the framerate stable.
self.clock.tick(self.metadata["render_fps"])
else: # rgb_array
return np.transpose(
np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
)
# %%
# Close
# ~~~~~
#
# The ``close`` method should close any open resources that were used by
# the environment. In many cases, you dont actually have to bother to
# implement this method. However, in our example ``render_mode`` may be
# ``"human"`` and we might need to close the window that has been opened:
def close(self):
if self.window is not None:
pygame.display.quit()
pygame.quit()
# %%
# In other environments ``close`` might also close files that were opened
# or release other resources. You shouldnt interact with the environment
# after having called ``close``.
# %%
# Registering Envs
# ----------------
#
# In order for the custom environments to be detected by Gymnasium, they
# must be registered as follows. We will choose to put this code in
# ``gymnasium_env/__init__.py``.
#
# .. code:: python
#
# from gymnasium.envs.registration import register
#
# register(
# id="gymnasium_env/GridWorld-v0",
# entry_point="gymnasium_env.envs:GridWorldEnv",
# )
# %%
# The environment ID consists of three components, two of which are
# optional: an optional namespace (here: ``gymnasium_env``), a mandatory
# name (here: ``GridWorld``) and an optional but recommended version
# (here: v0). It might have also been registered as ``GridWorld-v0`` (the
# recommended approach), ``GridWorld`` or ``gymnasium_env/GridWorld``, and
# the appropriate ID should then be used during environment creation.
#
# The keyword argument ``max_episode_steps=300`` will ensure that
# GridWorld environments that are instantiated via ``gymnasium.make`` will
# be wrapped in a ``TimeLimit`` wrapper (see `the wrapper
# documentation </api/wrappers>`__ for more information). A done signal
# will then be produced if the agent has reached the target *or* 300 steps
# have been executed in the current episode. To distinguish truncation and
# termination, you can check ``info["TimeLimit.truncated"]``.
#
# Apart from ``id`` and ``entrypoint``, you may pass the following
# additional keyword arguments to ``register``:
#
# +----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+
# | Name | Type | Default | Description |
# +======================+===========+===========+===============================================================================================================+
# | ``reward_threshold`` | ``float`` | ``None`` | The reward threshold before the task is considered solved |
# +----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+
# | ``nondeterministic`` | ``bool`` | ``False`` | Whether this environment is non-deterministic even after seeding |
# +----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+
# | ``max_episode_steps``| ``int`` | ``None`` | The maximum number of steps that an episode can consist of. If not ``None``, a ``TimeLimit`` wrapper is added |
# +----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+
# | ``order_enforce`` | ``bool`` | ``True`` | Whether to wrap the environment in an ``OrderEnforcing`` wrapper |
# +----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+
# | ``kwargs`` | ``dict`` | ``{}`` | The default kwargs to pass to the environment class |
# +----------------------+-----------+-----------+---------------------------------------------------------------------------------------------------------------+
#
# Most of these keywords (except for ``max_episode_steps``,
# ``order_enforce`` and ``kwargs``) do not alter the behavior of
# environment instances but merely provide some extra information about
# your environment. After registration, our custom ``GridWorldEnv``
# environment can be created with
# ``env = gymnasium.make('gymnasium_env/GridWorld-v0')``.
#
# ``gymnasium_env/envs/__init__.py`` should have:
#
# .. code:: python
#
# from gymnasium_env.envs.grid_world import GridWorldEnv
#
# If your environment is not registered, you may optionally pass a module
# to import, that would register your environment before creating it like
# this - ``env = gymnasium.make('module:Env-v0')``, where ``module``
# contains the registration code. For the GridWorld env, the registration
# code is run by importing ``gymnasium_env`` so if it were not possible to
# import gymnasium_env explicitly, you could register while making by
# ``env = gymnasium.make('gymnasium_env:gymnasium_env/GridWorld-v0')``. This
# is especially useful when youre allowed to pass only the environment ID
# into a third-party codebase (eg. learning library). This lets you
# register your environment without needing to edit the librarys source
# code.
# %%
# Creating a Package
# ------------------
#
# The last step is to structure our code as a Python package. This
# involves configuring ``pyproject.toml``. A minimal example of how
# to do so is as follows:
#
# .. code:: toml
#
# [build-system]
# requires = ["hatchling"]
# build-backend = "hatchling.build"
#
# [project]
# name = "gymnasium_env"
# version = "0.0.1"
# dependencies = [
# "gymnasium",
# "pygame==2.1.3",
# "pre-commit",
# ]
#
# Creating Environment Instances
# ------------------------------
#
# Now you can install your package locally with:
#
# .. code:: console
#
# pip install -e .
#
# And you can create an instance of the environment via:
#
# .. code:: python
#
# # run_gymnasium_env.py
#
# import gymnasium
# import gymnasium_env
# env = gymnasium.make('gymnasium_env/GridWorld-v0')
#
# You can also pass keyword arguments of your environments constructor to
# ``gymnasium.make`` to customize the environment. In our case, we could
# do:
#
# .. code:: python
#
# env = gymnasium.make('gymnasium_env/GridWorld-v0', size=10)
#
# Sometimes, you may find it more convenient to skip registration and call
# the environments constructor yourself. Some may find this approach more
# pythonic and environments that are instantiated like this are also
# perfectly fine (but remember to add wrappers as well!).
#
# Using Wrappers
# --------------
#
# Oftentimes, we want to use different variants of a custom environment,
# or we want to modify the behavior of an environment that is provided by
# Gymnasium or some other party. Wrappers allow us to do this without
# changing the environment implementation or adding any boilerplate code.
# Check out the `wrapper documentation </api/wrappers/>`__ for details on
# how to use wrappers and instructions for implementing your own. In our
# example, observations cannot be used directly in learning code because
# they are dictionaries. However, we dont actually need to touch our
# environment implementation to fix this! We can simply add a wrapper on
# top of environment instances to flatten observations into a single
# array:
#
# .. code:: python
#
# import gymnasium
# import gymnasium_env
# from gymnasium.wrappers import FlattenObservation
#
# env = gymnasium.make('gymnasium_env/GridWorld-v0')
# wrapped_env = FlattenObservation(env)
# print(wrapped_env.reset()) # E.g. [3 0 3 3], {}
#
# Wrappers have the big advantage that they make environments highly
# modular. For instance, instead of flattening the observations from
# GridWorld, you might only want to look at the relative position of the
# target and the agent. In the section on
# `ObservationWrappers </api/wrappers/observation_wrappers/#observation-wrappers>`__ we have
# implemented a wrapper that does this job. This wrapper is also available
# in ``gymnasium_env/wrappers/relative_position.py``:
#
# .. code:: python
#
# import gymnasium
# import gymnasium_env
# from gymnasium_env.wrappers import RelativePosition
#
# env = gymnasium.make('gymnasium_env/GridWorld-v0')
# wrapped_env = RelativePosition(env)
# print(wrapped_env.reset()) # E.g. [-3 3], {}

View File

@@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Training using REINFORCE for Mujoco\n\n<img src=\"file://_static/img/tutorials/mujoco_reinforce_fig1.gif\" width=\"400\" alt=\"agent-environment-diagram\">\n\nThis tutorial implements REINFORCE with neural networks for a MuJoCo environment.\n\nWe will be using **REINFORCE**, one of the earliest policy gradient methods. Unlike going under the burden of learning a value function first and then deriving a policy out of it,\nREINFORCE optimizes the policy directly. In other words, it is trained to maximize the probability of Monte-Carlo returns. More on that later.\n\n**Inverted Pendulum** is Mujoco's cartpole but now powered by the Mujoco physics simulator -\nwhich allows more complex experiments (such as varying the effects of gravity).\nThis environment involves a cart that can moved linearly, with a pole fixed on it at one end and having another end free.\nThe cart can be pushed left or right, and the goal is to balance the pole on the top of the cart by applying forces on the cart.\nMore information on the environment could be found at https://gymnasium.farama.org/environments/mujoco/inverted_pendulum/\n\n**Training Objectives**: To balance the pole (inverted pendulum) on top of the cart\n\n**Actions**: The agent takes a 1D vector for actions. The action space is a continuous ``(action)`` in ``[-3, 3]``,\nwhere action represents the numerical force applied to the cart\n(with magnitude representing the amount of force and sign representing the direction)\n\n**Approach**: We use PyTorch to code REINFORCE from scratch to train a Neural Network policy to master Inverted Pendulum.\n\nAn explanation of the Gymnasium v0.26+ `Env.step()` function\n\n``env.step(A)`` allows us to take an action 'A' in the current environment 'env'. The environment then executes the action\nand returns five variables:\n\n - ``next_obs``: This is the observation that the agent will receive after taking the action.\n - ``reward``: This is the reward that the agent will receive after taking the action.\n - ``terminated``: This is a boolean variable that indicates whether or not the environment has terminated.\n - ``truncated``: This is a boolean variable that also indicates whether the episode ended by early truncation, i.e., a time limit is reached.\n - ``info``: This is a dictionary that might contain additional information about the environment.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from __future__ import annotations\n\nimport random\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\nimport torch\nimport torch.nn as nn\nfrom torch.distributions.normal import Normal\n\nimport gymnasium as gym\n\n\nplt.rcParams[\"figure.figsize\"] = (10, 5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Policy Network\n\n<img src=\"file://_static/img/tutorials/mujoco_reinforce_fig2.png\">\n\nWe start by building a policy that the agent will learn using REINFORCE.\nA policy is a mapping from the current environment observation to a probability distribution of the actions to be taken.\nThe policy used in the tutorial is parameterized by a neural network. It consists of 2 linear layers that are shared between both the predicted mean and standard deviation.\nFurther, the single individual linear layers are used to estimate the mean and the standard deviation. ``nn.Tanh`` is used as a non-linearity between the hidden layers.\nThe following function estimates a mean and standard deviation of a normal distribution from which an action is sampled. Hence it is expected for the policy to learn\nappropriate weights to output means and standard deviation based on the current observation.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class Policy_Network(nn.Module):\n \"\"\"Parametrized Policy Network.\"\"\"\n\n def __init__(self, obs_space_dims: int, action_space_dims: int):\n \"\"\"Initializes a neural network that estimates the mean and standard deviation\n of a normal distribution from which an action is sampled from.\n\n Args:\n obs_space_dims: Dimension of the observation space\n action_space_dims: Dimension of the action space\n \"\"\"\n super().__init__()\n\n hidden_space1 = 16 # Nothing special with 16, feel free to change\n hidden_space2 = 32 # Nothing special with 32, feel free to change\n\n # Shared Network\n self.shared_net = nn.Sequential(\n nn.Linear(obs_space_dims, hidden_space1),\n nn.Tanh(),\n nn.Linear(hidden_space1, hidden_space2),\n nn.Tanh(),\n )\n\n # Policy Mean specific Linear Layer\n self.policy_mean_net = nn.Sequential(\n nn.Linear(hidden_space2, action_space_dims)\n )\n\n # Policy Std Dev specific Linear Layer\n self.policy_stddev_net = nn.Sequential(\n nn.Linear(hidden_space2, action_space_dims)\n )\n\n def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:\n \"\"\"Conditioned on the observation, returns the mean and standard deviation\n of a normal distribution from which an action is sampled from.\n\n Args:\n x: Observation from the environment\n\n Returns:\n action_means: predicted mean of the normal distribution\n action_stddevs: predicted standard deviation of the normal distribution\n \"\"\"\n shared_features = self.shared_net(x.float())\n\n action_means = self.policy_mean_net(shared_features)\n action_stddevs = torch.log(\n 1 + torch.exp(self.policy_stddev_net(shared_features))\n )\n\n return action_means, action_stddevs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building an agent\n\n<img src=\"file://_static/img/tutorials/mujoco_reinforce_fig3.jpeg\">\n\nNow that we are done building the policy, let us develop **REINFORCE** which gives life to the policy network.\nThe algorithm of REINFORCE could be found above. As mentioned before, REINFORCE aims to maximize the Monte-Carlo returns.\n\nFun Fact: REINFORCE is an acronym for \" 'RE'ward 'I'ncrement 'N'on-negative 'F'actor times 'O'ffset 'R'einforcement times 'C'haracteristic 'E'ligibility\n\nNote: The choice of hyperparameters is to train a decently performing agent. No extensive hyperparameter\ntuning was done.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class REINFORCE:\n \"\"\"REINFORCE algorithm.\"\"\"\n\n def __init__(self, obs_space_dims: int, action_space_dims: int):\n \"\"\"Initializes an agent that learns a policy via REINFORCE algorithm [1]\n to solve the task at hand (Inverted Pendulum v4).\n\n Args:\n obs_space_dims: Dimension of the observation space\n action_space_dims: Dimension of the action space\n \"\"\"\n\n # Hyperparameters\n self.learning_rate = 1e-4 # Learning rate for policy optimization\n self.gamma = 0.99 # Discount factor\n self.eps = 1e-6 # small number for mathematical stability\n\n self.probs = [] # Stores probability values of the sampled action\n self.rewards = [] # Stores the corresponding rewards\n\n self.net = Policy_Network(obs_space_dims, action_space_dims)\n self.optimizer = torch.optim.AdamW(self.net.parameters(), lr=self.learning_rate)\n\n def sample_action(self, state: np.ndarray) -> float:\n \"\"\"Returns an action, conditioned on the policy and observation.\n\n Args:\n state: Observation from the environment\n\n Returns:\n action: Action to be performed\n \"\"\"\n state = torch.tensor(np.array([state]))\n action_means, action_stddevs = self.net(state)\n\n # create a normal distribution from the predicted\n # mean and standard deviation and sample an action\n distrib = Normal(action_means[0] + self.eps, action_stddevs[0] + self.eps)\n action = distrib.sample()\n prob = distrib.log_prob(action)\n\n action = action.numpy()\n\n self.probs.append(prob)\n\n return action\n\n def update(self):\n \"\"\"Updates the policy network's weights.\"\"\"\n running_g = 0\n gs = []\n\n # Discounted return (backwards) - [::-1] will return an array in reverse\n for R in self.rewards[::-1]:\n running_g = R + self.gamma * running_g\n gs.insert(0, running_g)\n\n deltas = torch.tensor(gs)\n\n log_probs = torch.stack(self.probs).squeeze()\n\n # Update the loss with the mean log probability and deltas\n # Now, we compute the correct total loss by taking the sum of the element-wise products.\n loss = -torch.sum(log_probs * deltas)\n\n # Update the policy network\n self.optimizer.zero_grad()\n loss.backward()\n self.optimizer.step()\n\n # Empty / zero out all episode-centric/related variables\n self.probs = []\n self.rewards = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now lets train the policy using REINFORCE to master the task of Inverted Pendulum.\n\nFollowing is the overview of the training procedure\n\n for seed in random seeds\n reinitialize agent\n\n for episode in range of max number of episodes\n until episode is done\n sample action based on current observation\n\n take action and receive reward and next observation\n\n store action take, its probability, and the observed reward\n update the policy\n\nNote: Deep RL is fairly brittle concerning random seed in a lot of common use cases (https://spinningup.openai.com/en/latest/spinningup/spinningup.html).\nHence it is important to test out various seeds, which we will be doing.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Create and wrap the environment\nenv = gym.make(\"InvertedPendulum-v4\")\nwrapped_env = gym.wrappers.RecordEpisodeStatistics(env, 50) # Records episode-reward\n\ntotal_num_episodes = int(5e3) # Total number of episodes\n# Observation-space of InvertedPendulum-v4 (4)\nobs_space_dims = env.observation_space.shape[0]\n# Action-space of InvertedPendulum-v4 (1)\naction_space_dims = env.action_space.shape[0]\nrewards_over_seeds = []\n\nfor seed in [1, 2, 3, 5, 8]: # Fibonacci seeds\n # set seed\n torch.manual_seed(seed)\n random.seed(seed)\n np.random.seed(seed)\n\n # Reinitialize agent every seed\n agent = REINFORCE(obs_space_dims, action_space_dims)\n reward_over_episodes = []\n\n for episode in range(total_num_episodes):\n # gymnasium v26 requires users to set seed while resetting the environment\n obs, info = wrapped_env.reset(seed=seed)\n\n done = False\n while not done:\n action = agent.sample_action(obs)\n\n # Step return type - `tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]`\n # These represent the next observation, the reward from the step,\n # if the episode is terminated, if the episode is truncated and\n # additional info from the step\n obs, reward, terminated, truncated, info = wrapped_env.step(action)\n agent.rewards.append(reward)\n\n # End the episode when either truncated or terminated is true\n # - truncated: The episode duration reaches max number of timesteps\n # - terminated: Any of the state space values is no longer finite.\n #\n done = terminated or truncated\n\n reward_over_episodes.append(wrapped_env.return_queue[-1])\n agent.update()\n\n if episode % 1000 == 0:\n avg_reward = int(np.mean(wrapped_env.return_queue))\n print(\"Episode:\", episode, \"Average Reward:\", avg_reward)\n\n rewards_over_seeds.append(reward_over_episodes)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot learning curve\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df1 = pd.DataFrame(rewards_over_seeds).melt()\ndf1.rename(columns={\"variable\": \"episodes\", \"value\": \"reward\"}, inplace=True)\nsns.set(style=\"darkgrid\", context=\"talk\", palette=\"rainbow\")\nsns.lineplot(x=\"episodes\", y=\"reward\", data=df1).set(\n title=\"REINFORCE for InvertedPendulum-v4\"\n)\nplt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"file://_static/img/tutorials/mujoco_reinforce_fig4.png\">\n\nAuthor: Siddarth Chandrasekar\n\nLicense: MIT License\n\n## References\n\n[1] Williams, Ronald J.. \u201cSimple statistical gradient-following\nalgorithms for connectionist reinforcement learning.\u201d Machine Learning 8\n(2004): 229-256.\n\n\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,318 @@
# fmt: off
"""
Training using REINFORCE for Mujoco
===================================
.. image:: /_static/img/tutorials/mujoco_reinforce_fig1.gif
:width: 400
:alt: agent-environment-diagram
This tutorial implements REINFORCE with neural networks for a MuJoCo environment.
We will be using **REINFORCE**, one of the earliest policy gradient methods. Unlike going under the burden of learning a value function first and then deriving a policy out of it,
REINFORCE optimizes the policy directly. In other words, it is trained to maximize the probability of Monte-Carlo returns. More on that later.
**Inverted Pendulum** is Mujoco's cartpole but now powered by the Mujoco physics simulator -
which allows more complex experiments (such as varying the effects of gravity).
This environment involves a cart that can moved linearly, with a pole fixed on it at one end and having another end free.
The cart can be pushed left or right, and the goal is to balance the pole on the top of the cart by applying forces on the cart.
More information on the environment could be found at https://gymnasium.farama.org/environments/mujoco/inverted_pendulum/
**Training Objectives**: To balance the pole (inverted pendulum) on top of the cart
**Actions**: The agent takes a 1D vector for actions. The action space is a continuous ``(action)`` in ``[-3, 3]``,
where action represents the numerical force applied to the cart
(with magnitude representing the amount of force and sign representing the direction)
**Approach**: We use PyTorch to code REINFORCE from scratch to train a Neural Network policy to master Inverted Pendulum.
An explanation of the Gymnasium v0.26+ `Env.step()` function
``env.step(A)`` allows us to take an action 'A' in the current environment 'env'. The environment then executes the action
and returns five variables:
- ``next_obs``: This is the observation that the agent will receive after taking the action.
- ``reward``: This is the reward that the agent will receive after taking the action.
- ``terminated``: This is a boolean variable that indicates whether or not the environment has terminated.
- ``truncated``: This is a boolean variable that also indicates whether the episode ended by early truncation, i.e., a time limit is reached.
- ``info``: This is a dictionary that might contain additional information about the environment.
"""
from __future__ import annotations
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
from torch.distributions.normal import Normal
import gymnasium as gym
plt.rcParams["figure.figsize"] = (10, 5)
# %%
# Policy Network
# ~~~~~~~~~~~~~~
#
# .. image:: /_static/img/tutorials/mujoco_reinforce_fig2.png
#
# We start by building a policy that the agent will learn using REINFORCE.
# A policy is a mapping from the current environment observation to a probability distribution of the actions to be taken.
# The policy used in the tutorial is parameterized by a neural network. It consists of 2 linear layers that are shared between both the predicted mean and standard deviation.
# Further, the single individual linear layers are used to estimate the mean and the standard deviation. ``nn.Tanh`` is used as a non-linearity between the hidden layers.
# The following function estimates a mean and standard deviation of a normal distribution from which an action is sampled. Hence it is expected for the policy to learn
# appropriate weights to output means and standard deviation based on the current observation.
class Policy_Network(nn.Module):
"""Parametrized Policy Network."""
def __init__(self, obs_space_dims: int, action_space_dims: int):
"""Initializes a neural network that estimates the mean and standard deviation
of a normal distribution from which an action is sampled from.
Args:
obs_space_dims: Dimension of the observation space
action_space_dims: Dimension of the action space
"""
super().__init__()
hidden_space1 = 16 # Nothing special with 16, feel free to change
hidden_space2 = 32 # Nothing special with 32, feel free to change
# Shared Network
self.shared_net = nn.Sequential(
nn.Linear(obs_space_dims, hidden_space1),
nn.Tanh(),
nn.Linear(hidden_space1, hidden_space2),
nn.Tanh(),
)
# Policy Mean specific Linear Layer
self.policy_mean_net = nn.Sequential(
nn.Linear(hidden_space2, action_space_dims)
)
# Policy Std Dev specific Linear Layer
self.policy_stddev_net = nn.Sequential(
nn.Linear(hidden_space2, action_space_dims)
)
def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
"""Conditioned on the observation, returns the mean and standard deviation
of a normal distribution from which an action is sampled from.
Args:
x: Observation from the environment
Returns:
action_means: predicted mean of the normal distribution
action_stddevs: predicted standard deviation of the normal distribution
"""
shared_features = self.shared_net(x.float())
action_means = self.policy_mean_net(shared_features)
action_stddevs = torch.log(
1 + torch.exp(self.policy_stddev_net(shared_features))
)
return action_means, action_stddevs
# %%
# Building an agent
# ~~~~~~~~~~~~~~~~~
#
# .. image:: /_static/img/tutorials/mujoco_reinforce_fig3.jpeg
#
# Now that we are done building the policy, let us develop **REINFORCE** which gives life to the policy network.
# The algorithm of REINFORCE could be found above. As mentioned before, REINFORCE aims to maximize the Monte-Carlo returns.
#
# Fun Fact: REINFORCE is an acronym for " 'RE'ward 'I'ncrement 'N'on-negative 'F'actor times 'O'ffset 'R'einforcement times 'C'haracteristic 'E'ligibility
#
# Note: The choice of hyperparameters is to train a decently performing agent. No extensive hyperparameter
# tuning was done.
#
class REINFORCE:
"""REINFORCE algorithm."""
def __init__(self, obs_space_dims: int, action_space_dims: int):
"""Initializes an agent that learns a policy via REINFORCE algorithm [1]
to solve the task at hand (Inverted Pendulum v4).
Args:
obs_space_dims: Dimension of the observation space
action_space_dims: Dimension of the action space
"""
# Hyperparameters
self.learning_rate = 1e-4 # Learning rate for policy optimization
self.gamma = 0.99 # Discount factor
self.eps = 1e-6 # small number for mathematical stability
self.probs = [] # Stores probability values of the sampled action
self.rewards = [] # Stores the corresponding rewards
self.net = Policy_Network(obs_space_dims, action_space_dims)
self.optimizer = torch.optim.AdamW(self.net.parameters(), lr=self.learning_rate)
def sample_action(self, state: np.ndarray) -> float:
"""Returns an action, conditioned on the policy and observation.
Args:
state: Observation from the environment
Returns:
action: Action to be performed
"""
state = torch.tensor(np.array([state]))
action_means, action_stddevs = self.net(state)
# create a normal distribution from the predicted
# mean and standard deviation and sample an action
distrib = Normal(action_means[0] + self.eps, action_stddevs[0] + self.eps)
action = distrib.sample()
prob = distrib.log_prob(action)
action = action.numpy()
self.probs.append(prob)
return action
def update(self):
"""Updates the policy network's weights."""
running_g = 0
gs = []
# Discounted return (backwards) - [::-1] will return an array in reverse
for R in self.rewards[::-1]:
running_g = R + self.gamma * running_g
gs.insert(0, running_g)
deltas = torch.tensor(gs)
log_probs = torch.stack(self.probs).squeeze()
# Update the loss with the mean log probability and deltas
# Now, we compute the correct total loss by taking the sum of the element-wise products.
loss = -torch.sum(log_probs * deltas)
# Update the policy network
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# Empty / zero out all episode-centric/related variables
self.probs = []
self.rewards = []
# %%
# Now lets train the policy using REINFORCE to master the task of Inverted Pendulum.
#
# Following is the overview of the training procedure
#
# for seed in random seeds
# reinitialize agent
#
# for episode in range of max number of episodes
# until episode is done
# sample action based on current observation
#
# take action and receive reward and next observation
#
# store action take, its probability, and the observed reward
# update the policy
#
# Note: Deep RL is fairly brittle concerning random seed in a lot of common use cases (https://spinningup.openai.com/en/latest/spinningup/spinningup.html).
# Hence it is important to test out various seeds, which we will be doing.
# Create and wrap the environment
env = gym.make("InvertedPendulum-v4")
wrapped_env = gym.wrappers.RecordEpisodeStatistics(env, 50) # Records episode-reward
total_num_episodes = int(5e3) # Total number of episodes
# Observation-space of InvertedPendulum-v4 (4)
obs_space_dims = env.observation_space.shape[0]
# Action-space of InvertedPendulum-v4 (1)
action_space_dims = env.action_space.shape[0]
rewards_over_seeds = []
for seed in [1, 2, 3, 5, 8]: # Fibonacci seeds
# set seed
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
# Reinitialize agent every seed
agent = REINFORCE(obs_space_dims, action_space_dims)
reward_over_episodes = []
for episode in range(total_num_episodes):
# gymnasium v26 requires users to set seed while resetting the environment
obs, info = wrapped_env.reset(seed=seed)
done = False
while not done:
action = agent.sample_action(obs)
# Step return type - `tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]`
# These represent the next observation, the reward from the step,
# if the episode is terminated, if the episode is truncated and
# additional info from the step
obs, reward, terminated, truncated, info = wrapped_env.step(action)
agent.rewards.append(reward)
# End the episode when either truncated or terminated is true
# - truncated: The episode duration reaches max number of timesteps
# - terminated: Any of the state space values is no longer finite.
#
done = terminated or truncated
reward_over_episodes.append(wrapped_env.return_queue[-1])
agent.update()
if episode % 1000 == 0:
avg_reward = int(np.mean(wrapped_env.return_queue))
print("Episode:", episode, "Average Reward:", avg_reward)
rewards_over_seeds.append(reward_over_episodes)
# %%
# Plot learning curve
# ~~~~~~~~~~~~~~~~~~~
#
df1 = pd.DataFrame(rewards_over_seeds).melt()
df1.rename(columns={"variable": "episodes", "value": "reward"}, inplace=True)
sns.set(style="darkgrid", context="talk", palette="rainbow")
sns.lineplot(x="episodes", y="reward", data=df1).set(
title="REINFORCE for InvertedPendulum-v4"
)
plt.show()
# %%
# .. image:: /_static/img/tutorials/mujoco_reinforce_fig4.png
#
# Author: Siddarth Chandrasekar
#
# License: MIT License
#
# References
# ~~~~~~~~~~
#
# [1] Williams, Ronald J.. “Simple statistical gradient-following
# algorithms for connectionist reinforcement learning.” Machine Learning 8
# (2004): 229-256.
#

View File

@@ -0,0 +1,461 @@
"""
Solving Blackjack with Tabular Q-Learning
=========================================
This tutorial trains an agent for BlackJack using tabular Q-learning.
"""
# %%
# .. image:: /_static/img/tutorials/blackjack_AE_loop.jpg
# :width: 650
# :alt: agent-environment-diagram
# :class: only-light
# .. image:: /_static/img/tutorials/blackjack_AE_loop_dark.png
# :width: 650
# :alt: agent-environment-diagram
# :class: only-dark
#
# In this tutorial, well explore and solve the *Blackjack-v1*
# environment.
#
# **Blackjack** is one of the most popular casino card games that is also
# infamous for being beatable under certain conditions. This version of
# the game uses an infinite deck (we draw the cards with replacement), so
# counting cards wont be a viable strategy in our simulated game.
# Full documentation can be found at https://gymnasium.farama.org/environments/toy_text/blackjack
#
# **Objective**: To win, your card sum should be greater than the
# dealers without exceeding 21.
#
# **Actions**: Agents can pick between two actions:
# - stand (0): the player takes no more cards
# - hit (1): the player will be given another card, however the player could get over 21 and bust
#
# **Approach**: To solve this environment by yourself, you can pick your
# favorite discrete RL algorithm. The presented solution uses *Q-learning*
# (a model-free RL algorithm).
#
# %%
# Imports and Environment Setup
# ------------------------------
#
# Author: Till Zemann
# License: MIT License
from __future__ import annotations
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.patches import Patch
from tqdm import tqdm
import gymnasium as gym
# Let's start by creating the blackjack environment.
# Note: We are going to follow the rules from Sutton & Barto.
# Other versions of the game can be found below for you to experiment.
env = gym.make("Blackjack-v1", sab=True)
# %%
# .. code:: py
#
# # Other possible environment configurations are:
#
# env = gym.make('Blackjack-v1', natural=True, sab=False)
# # Whether to give an additional reward for starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21).
#
# env = gym.make('Blackjack-v1', natural=False, sab=False)
# # Whether to follow the exact rules outlined in the book by Sutton and Barto. If `sab` is `True`, the keyword argument `natural` will be ignored.
#
# %%
# Observing the environment
# ------------------------------
#
# First of all, we call ``env.reset()`` to start an episode. This function
# resets the environment to a starting position and returns an initial
# ``observation``. We usually also set ``done = False``. This variable
# will be useful later to check if a game is terminated (i.e., the player wins or loses).
#
# reset the environment to get the first observation
done = False
observation, info = env.reset()
# observation = (16, 9, False)
# %%
# Note that our observation is a 3-tuple consisting of 3 values:
#
# - The players current sum
# - Value of the dealers face-up card
# - Boolean whether the player holds a usable ace (An ace is usable if it
# counts as 11 without busting)
#
# %%
# Executing an action
# ------------------------------
#
# After receiving our first observation, we are only going to use the
# ``env.step(action)`` function to interact with the environment. This
# function takes an action as input and executes it in the environment.
# Because that action changes the state of the environment, it returns
# four useful variables to us. These are:
#
# - ``next_state``: This is the observation that the agent will receive
# after taking the action.
# - ``reward``: This is the reward that the agent will receive after
# taking the action.
# - ``terminated``: This is a boolean variable that indicates whether or
# not the environment has terminated.
# - ``truncated``: This is a boolean variable that also indicates whether
# the episode ended by early truncation, i.e., a time limit is reached.
# - ``info``: This is a dictionary that might contain additional
# information about the environment.
#
# The ``next_state``, ``reward``, ``terminated`` and ``truncated`` variables are
# self-explanatory, but the ``info`` variable requires some additional
# explanation. This variable contains a dictionary that might have some
# extra information about the environment, but in the Blackjack-v1
# environment you can ignore it. For example in Atari environments the
# info dictionary has a ``ale.lives`` key that tells us how many lives the
# agent has left. If the agent has 0 lives, then the episode is over.
#
# Note that it is not a good idea to call ``env.render()`` in your training
# loop because rendering slows down training by a lot. Rather try to build
# an extra loop to evaluate and showcase the agent after training.
#
# sample a random action from all valid actions
action = env.action_space.sample()
# action=1
# execute the action in our environment and receive infos from the environment
observation, reward, terminated, truncated, info = env.step(action)
# observation=(24, 10, False)
# reward=-1.0
# terminated=True
# truncated=False
# info={}
# %%
# Once ``terminated = True`` or ``truncated=True``, we should stop the
# current episode and begin a new one with ``env.reset()``. If you
# continue executing actions without resetting the environment, it still
# responds but the output wont be useful for training (it might even be
# harmful if the agent learns on invalid data).
#
# %%
# Building an agent
# ------------------------------
#
# Lets build a ``Q-learning agent`` to solve *Blackjack-v1*! Well need
# some functions for picking an action and updating the agents action
# values. To ensure that the agents explores the environment, one possible
# solution is the ``epsilon-greedy`` strategy, where we pick a random
# action with the percentage ``epsilon`` and the greedy action (currently
# valued as the best) ``1 - epsilon``.
#
class BlackjackAgent:
def __init__(
self,
env,
learning_rate: float,
initial_epsilon: float,
epsilon_decay: float,
final_epsilon: float,
discount_factor: float = 0.95,
):
"""Initialize a Reinforcement Learning agent with an empty dictionary
of state-action values (q_values), a learning rate and an epsilon.
Args:
learning_rate: The learning rate
initial_epsilon: The initial epsilon value
epsilon_decay: The decay for epsilon
final_epsilon: The final epsilon value
discount_factor: The discount factor for computing the Q-value
"""
self.q_values = defaultdict(lambda: np.zeros(env.action_space.n))
self.lr = learning_rate
self.discount_factor = discount_factor
self.epsilon = initial_epsilon
self.epsilon_decay = epsilon_decay
self.final_epsilon = final_epsilon
self.training_error = []
def get_action(self, env, obs: tuple[int, int, bool]) -> int:
"""
Returns the best action with probability (1 - epsilon)
otherwise a random action with probability epsilon to ensure exploration.
"""
# with probability epsilon return a random action to explore the environment
if np.random.random() < self.epsilon:
return env.action_space.sample()
# with probability (1 - epsilon) act greedily (exploit)
else:
return int(np.argmax(self.q_values[obs]))
def update(
self,
obs: tuple[int, int, bool],
action: int,
reward: float,
terminated: bool,
next_obs: tuple[int, int, bool],
):
"""Updates the Q-value of an action."""
future_q_value = (not terminated) * np.max(self.q_values[next_obs])
temporal_difference = (
reward + self.discount_factor * future_q_value - self.q_values[obs][action]
)
self.q_values[obs][action] = (
self.q_values[obs][action] + self.lr * temporal_difference
)
self.training_error.append(temporal_difference)
def decay_epsilon(self):
self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)
# %%
# To train the agent, we will let the agent play one episode (one complete
# game is called an episode) at a time and then update its Q-values after
# each step (one single action in a game is called a step).
#
# The agent will have to experience a lot of episodes to explore the
# environment sufficiently.
#
# Now we should be ready to build the training loop.
#
# hyperparameters
learning_rate = 0.01
n_episodes = 100_000
start_epsilon = 1.0
epsilon_decay = start_epsilon / (n_episodes / 2) # reduce the exploration over time
final_epsilon = 0.1
agent = BlackjackAgent(
env=env,
learning_rate=learning_rate,
initial_epsilon=start_epsilon,
epsilon_decay=epsilon_decay,
final_epsilon=final_epsilon,
)
# %%
# Great, lets train!
#
# Info: The current hyperparameters are set to quickly train a decent agent.
# If you want to converge to the optimal policy, try increasing
# the n_episodes by 10x and lower the learning_rate (e.g. to 0.001).
#
env = gym.wrappers.RecordEpisodeStatistics(env, buffer_length=n_episodes)
for episode in tqdm(range(n_episodes)):
obs, info = env.reset()
done = False
# play one episode
while not done:
action = agent.get_action(env, obs)
next_obs, reward, terminated, truncated, info = env.step(action)
# update the agent
agent.update(obs, action, reward, terminated, next_obs)
# update if the environment is done and the current obs
done = terminated or truncated
obs = next_obs
agent.decay_epsilon()
# %%
# Visualizing the training
# ------------------------------
#
rolling_length = 500
fig, axs = plt.subplots(ncols=3, figsize=(12, 5))
axs[0].set_title("Episode rewards")
# compute and assign a rolling average of the data to provide a smoother graph
reward_moving_average = (
np.convolve(
np.array(env.return_queue).flatten(), np.ones(rolling_length), mode="valid"
)
/ rolling_length
)
axs[0].plot(range(len(reward_moving_average)), reward_moving_average)
axs[1].set_title("Episode lengths")
length_moving_average = (
np.convolve(
np.array(env.length_queue).flatten(), np.ones(rolling_length), mode="same"
)
/ rolling_length
)
axs[1].plot(range(len(length_moving_average)), length_moving_average)
axs[2].set_title("Training Error")
training_error_moving_average = (
np.convolve(np.array(agent.training_error), np.ones(rolling_length), mode="same")
/ rolling_length
)
axs[2].plot(range(len(training_error_moving_average)), training_error_moving_average)
plt.tight_layout()
plt.show()
# %%
# .. image:: /_static/img/tutorials/blackjack_training_plots.png
#
# %%
# Visualising the policy
# ------------------------------
def create_grids(agent, usable_ace=False):
"""Create value and policy grid given an agent."""
# convert our state-action values to state values
# and build a policy dictionary that maps observations to actions
state_value = defaultdict(float)
policy = defaultdict(int)
for obs, action_values in agent.q_values.items():
state_value[obs] = float(np.max(action_values))
policy[obs] = int(np.argmax(action_values))
player_count, dealer_count = np.meshgrid(
# players count, dealers face-up card
np.arange(12, 22),
np.arange(1, 11),
)
# create the value grid for plotting
value = np.apply_along_axis(
lambda obs: state_value[(obs[0], obs[1], usable_ace)],
axis=2,
arr=np.dstack([player_count, dealer_count]),
)
value_grid = player_count, dealer_count, value
# create the policy grid for plotting
policy_grid = np.apply_along_axis(
lambda obs: policy[(obs[0], obs[1], usable_ace)],
axis=2,
arr=np.dstack([player_count, dealer_count]),
)
return value_grid, policy_grid
def create_plots(value_grid, policy_grid, title: str):
"""Creates a plot using a value and policy grid."""
# create a new figure with 2 subplots (left: state values, right: policy)
player_count, dealer_count, value = value_grid
fig = plt.figure(figsize=plt.figaspect(0.4))
fig.suptitle(title, fontsize=16)
# plot the state values
ax1 = fig.add_subplot(1, 2, 1, projection="3d")
ax1.plot_surface(
player_count,
dealer_count,
value,
rstride=1,
cstride=1,
cmap="viridis",
edgecolor="none",
)
plt.xticks(range(12, 22), range(12, 22))
plt.yticks(range(1, 11), ["A"] + list(range(2, 11)))
ax1.set_title(f"State values: {title}")
ax1.set_xlabel("Player sum")
ax1.set_ylabel("Dealer showing")
ax1.zaxis.set_rotate_label(False)
ax1.set_zlabel("Value", fontsize=14, rotation=90)
ax1.view_init(20, 220)
# plot the policy
fig.add_subplot(1, 2, 2)
ax2 = sns.heatmap(policy_grid, linewidth=0, annot=True, cmap="Accent_r", cbar=False)
ax2.set_title(f"Policy: {title}")
ax2.set_xlabel("Player sum")
ax2.set_ylabel("Dealer showing")
ax2.set_xticklabels(range(12, 22))
ax2.set_yticklabels(["A"] + list(range(2, 11)), fontsize=12)
# add a legend
legend_elements = [
Patch(facecolor="lightgreen", edgecolor="black", label="Hit"),
Patch(facecolor="grey", edgecolor="black", label="Stick"),
]
ax2.legend(handles=legend_elements, bbox_to_anchor=(1.3, 1))
return fig
# state values & policy with usable ace (ace counts as 11)
value_grid, policy_grid = create_grids(agent, usable_ace=True)
fig1 = create_plots(value_grid, policy_grid, title="With usable ace")
plt.show()
# %%
# .. image:: /_static/img/tutorials/blackjack_with_usable_ace.png
#
# state values & policy without usable ace (ace counts as 1)
value_grid, policy_grid = create_grids(agent, usable_ace=False)
fig2 = create_plots(value_grid, policy_grid, title="Without usable ace")
plt.show()
# %%
# .. image:: /_static/img/tutorials/blackjack_without_usable_ace.png
#
# It's good practice to call env.close() at the end of your script,
# so that any used resources by the environment will be closed.
#
# %%
# Think you can do better?
# ------------------------------
# You can visualize the environment using the play function
# and try to win a few games.
# %%
# Hopefully this Tutorial helped you get a grip of how to interact with
# OpenAI-Gym environments and sets you on a journey to solve many more RL
# challenges.
#
# It is recommended that you solve this environment by yourself (project
# based learning is really effective!). You can apply your favorite
# discrete RL algorithm or give Monte Carlo ES a try (covered in `Sutton &
# Barto <http://incompleteideas.net/book/the-book-2nd.html>`_, section
# 5.3) - this way you can compare your results directly to the book.
#
# Best of fun!
#

View File

@@ -0,0 +1,290 @@
"""
Load custom quadruped robot environments
========================================
In this tutorial create a mujoco quadruped walking environment using a model file (ending in `.xml`) without having to create a new class.
Steps:
0. Get your **MJCF** (or **URDF**) model file of your robot.
- Create your own model (see the MuJoCo Guide) or,
- Find a ready-made model (in this tutorial, we will use a model from the MuJoCo Menagerie collection).
1. Load the model with the `xml_file` argument.
2. Tweak the environment parameters to get the desired behavior.
1. Tweak the environment simulation parameters.
2. Tweak the environment termination parameters.
3. Tweak the environment reward parameters.
4. Tweak the environment observation parameters.
3. Train an agent to move your robot.
"""
# The reader is expected to be familiar with the `Gymnasium` API & library, the basics of robotics,
# and the included `Gymnasium/MuJoCo` environments with the robot model they use.
# Familiarity with the **MJCF** file model format and the `MuJoCo` simulator is not required but is recommended.
# %%
# Setup
# -----
# We will need `gymnasium>=1.0.0`.
import numpy as np
import gymnasium as gym
# Make sure Gymnasium is properly installed
# You can run this in your terminal:
# pip install "gymnasium>=1.0.0"
# %%
# Step 0.1 - Download a Robot Model
# ---------------------------------
# In this tutorial we will load the Unitree Go1 robot from the excellent MuJoCo Menagerie robot model collection.
# Go1 is a quadruped robot, controlling it to move is a significant learning problem,
# much harder than the `Gymnasium/MuJoCo/Ant` environment.
#
# Note: The original tutorial includes an image of the Unitree Go1 robot in a flat terrain scene.
# You can view this image at: https://github.com/google-deepmind/mujoco_menagerie/blob/main/unitree_go1/go1.png?raw=true
# You can download the whole MuJoCo Menagerie collection (which includes `Go1`):
# git clone https://github.com/google-deepmind/mujoco_menagerie.git
# You can use any other quadruped robot with this tutorial, just adjust the environment parameter values for your robot.
# %%
# Step 1 - Load the model
# -----------------------
# To load the model, all we have to do is use the `xml_file` argument with the `Ant-v5` framework.
# Basic loading (uncomment to use)
# env = gym.make('Ant-v5', xml_file='./mujoco_menagerie/unitree_go1/scene.xml')
# Although this is enough to load the model, we will need to tweak some environment parameters
# to get the desired behavior for our environment, so we will also explicitly set the simulation,
# termination, reward and observation arguments, which we will tweak in the next step.
env = gym.make(
"Ant-v5",
xml_file="./mujoco_menagerie/unitree_go1/scene.xml",
forward_reward_weight=0,
ctrl_cost_weight=0,
contact_cost_weight=0,
healthy_reward=0,
main_body=1,
healthy_z_range=(0, np.inf),
include_cfrc_ext_in_observation=True,
exclude_current_positions_from_observation=False,
reset_noise_scale=0,
frame_skip=1,
max_episode_steps=1000,
)
# %%
# Step 2 - Tweaking the Environment Parameters
# --------------------------------------------
# Tweaking the environment parameters is essential to get the desired behavior for learning.
# In the following subsections, the reader is encouraged to consult the documentation of
# the arguments for more detailed information.
# %%
# Step 2.1 - Tweaking the Environment Simulation Parameters
# ---------------------------------------------------------
# The arguments of interest are `frame_skip`, `reset_noise_scale` and `max_episode_steps`.
# We want to tweak the `frame_skip` parameter to get `dt` to an acceptable value
# (typical values are `dt` ∈ [0.01, 0.1] seconds),
# Reminder: dt = frame_skip × model.opt.timestep, where `model.opt.timestep` is the integrator
# time step selected in the MJCF model file.
# The `Go1` model we are using has an integrator timestep of `0.002`, so by selecting
# `frame_skip=25` we can set the value of `dt` to `0.05s`.
# To avoid overfitting the policy, `reset_noise_scale` should be set to a value appropriate
# to the size of the robot, we want the value to be as large as possible without the initial
# distribution of states being invalid (`Terminal` regardless of control actions),
# for `Go1` we choose a value of `0.1`.
# And `max_episode_steps` determines the number of steps per episode before `truncation`,
# here we set it to 1000 to be consistent with the based `Gymnasium/MuJoCo` environments,
# but if you need something higher you can set it so.
env = gym.make(
"Ant-v5",
xml_file="./mujoco_menagerie/unitree_go1/scene.xml",
forward_reward_weight=0,
ctrl_cost_weight=0,
contact_cost_weight=0,
healthy_reward=0,
main_body=1,
healthy_z_range=(0, np.inf),
include_cfrc_ext_in_observation=True,
exclude_current_positions_from_observation=False,
reset_noise_scale=0.1, # set to avoid policy overfitting
frame_skip=25, # set dt=0.05
max_episode_steps=1000, # kept at 1000
)
# %%
# Step 2.2 - Tweaking the Environment Termination Parameters
# ----------------------------------------------------------
# Termination is important for robot environments to avoid sampling "useless" time steps.
# The arguments of interest are `terminate_when_unhealthy` and `healthy_z_range`.
# We want to set `healthy_z_range` to terminate the environment when the robot falls over,
# or jumps really high, here we have to choose a value that is logical for the height of the robot,
# for `Go1` we choose `(0.195, 0.75)`.
# Note: `healthy_z_range` checks the absolute value of the height of the robot,
# so if your scene contains different levels of elevation it should be set to `(-np.inf, np.inf)`
# We could also set `terminate_when_unhealthy=False` to disable termination altogether,
# which is not desirable in the case of `Go1`.
env = gym.make(
"Ant-v5",
xml_file="./mujoco_menagerie/unitree_go1/scene.xml",
forward_reward_weight=0,
ctrl_cost_weight=0,
contact_cost_weight=0,
healthy_reward=0,
main_body=1,
healthy_z_range=(
0.195,
0.75,
), # set to avoid sampling steps where the robot has fallen or jumped too high
include_cfrc_ext_in_observation=True,
exclude_current_positions_from_observation=False,
reset_noise_scale=0.1,
frame_skip=25,
max_episode_steps=1000,
)
# Note: If you need a different termination condition, you can write your own `TerminationWrapper`
# (see the documentation).
# %%
# Step 2.3 - Tweaking the Environment Reward Parameters
# -----------------------------------------------------
# The arguments of interest are `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight`,
# `healthy_reward`, and `main_body`.
# For the arguments `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight` and `healthy_reward`
# we have to pick values that make sense for our robot, you can use the default `MuJoCo/Ant`
# parameters for references and tweak them if a change is needed for your environment.
# In the case of `Go1` we only change the `ctrl_cost_weight` since it has a higher actuator force range.
# For the argument `main_body` we have to choose which body part is the main body
# (usually called something like "torso" or "trunk" in the model file) for the calculation
# of the `forward_reward`, in the case of `Go1` it is the `"trunk"`
# (Note: in most cases including this one, it can be left at the default value).
env = gym.make(
"Ant-v5",
xml_file="./mujoco_menagerie/unitree_go1/scene.xml",
forward_reward_weight=1, # kept the same as the 'Ant' environment
ctrl_cost_weight=0.05, # changed because of the stronger motors of `Go1`
contact_cost_weight=5e-4, # kept the same as the 'Ant' environment
healthy_reward=1, # kept the same as the 'Ant' environment
main_body=1, # represents the "trunk" of the `Go1` robot
healthy_z_range=(0.195, 0.75),
include_cfrc_ext_in_observation=True,
exclude_current_positions_from_observation=False,
reset_noise_scale=0.1,
frame_skip=25,
max_episode_steps=1000,
)
# Note: If you need a different reward function, you can write your own `RewardWrapper`
# (see the documentation).
# %%
# Step 2.4 - Tweaking the Environment Observation Parameters
# ----------------------------------------------------------
# The arguments of interest are `include_cfrc_ext_in_observation` and
# `exclude_current_positions_from_observation`.
# Here for `Go1` we have no particular reason to change them.
env = gym.make(
"Ant-v5",
xml_file="./mujoco_menagerie/unitree_go1/scene.xml",
forward_reward_weight=1,
ctrl_cost_weight=0.05,
contact_cost_weight=5e-4,
healthy_reward=1,
main_body=1,
healthy_z_range=(0.195, 0.75),
include_cfrc_ext_in_observation=True, # kept the same as the 'Ant' environment
exclude_current_positions_from_observation=False, # kept the same as the 'Ant' environment
reset_noise_scale=0.1,
frame_skip=25,
max_episode_steps=1000,
)
# Note: If you need additional observation elements (such as additional sensors),
# you can write your own `ObservationWrapper` (see the documentation).
# %%
# Step 3 - Train your Agent
# -------------------------
# Finally, we are done, we can use a RL algorithm to train an agent to walk/run the `Go1` robot.
# Note: If you have followed this guide with your own robot model, you may discover
# during training that some environment parameters were not as desired,
# feel free to go back to step 2 and change anything as needed.
def main():
"""Run the final Go1 environment setup."""
# Note: The original tutorial includes an image showing the Go1 robot in the environment.
# The image is available at: https://github.com/Kallinteris-Andreas/Gymnasium-kalli/assets/30759571/bf1797a3-264d-47de-b14c-e3c16072f695
env = gym.make(
"Ant-v5",
xml_file="./mujoco_menagerie/unitree_go1/scene.xml",
forward_reward_weight=1,
ctrl_cost_weight=0.05,
contact_cost_weight=5e-4,
healthy_reward=1,
main_body=1,
healthy_z_range=(0.195, 0.75),
include_cfrc_ext_in_observation=True,
exclude_current_positions_from_observation=False,
reset_noise_scale=0.1,
frame_skip=25,
max_episode_steps=1000,
render_mode="rgb_array", # Change to "human" to visualize
)
# Example of running the environment for a few steps
obs, info = env.reset()
for _ in range(100):
action = env.action_space.sample() # Replace with your agent's action
obs, reward, terminated, truncated, info = env.step(action)
if terminated or truncated:
obs, info = env.reset()
env.close()
print("Environment tested successfully!")
# Now you would typically:
# 1. Set up your RL algorithm
# 2. Train the agent
# 3. Evaluate the agent's performance
# %%
# Epilogue
# -------------------------
# You can follow this guide to create most quadruped environments.
# To create humanoid/bipedal robots, you can also follow this guide using the `Gymnasium/MuJoCo/Humnaoid-v5` framework.
#
# Note: The original tutorial includes a video demonstration of the trained Go1 robot walking.
# The video shows the robot achieving a speed of up to 4.7 m/s according to the manufacturer.
# In the original tutorial, this video is embedded from:
# https://odysee.com/$/embed/@Kallinteris-Andreas:7/video0-step-0-to-step-1000:1?r=6fn5jA9uZQUZXGKVpwtqjz1eyJcS3hj3
# Author: @kallinteris-andreas (https://github.com/Kallinteris-Andreas)

View File

@@ -0,0 +1,104 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Implementing Custom Wrappers\n\nIn this tutorial we will describe how to implement your own custom wrappers.\n\nWrappers are a great way to add functionality to your environments in a modular way.\nThis will save you a lot of boilerplate code.\n\nWe will show how to create a wrapper by\n\n- Inheriting from :class:`gymnasium.ObservationWrapper`\n- Inheriting from :class:`gymnasium.ActionWrapper`\n- Inheriting from :class:`gymnasium.RewardWrapper`\n- Inheriting from :class:`gymnasium.Wrapper`\n\nBefore following this tutorial, make sure to check out the docs of the :mod:`gymnasium.wrappers` module.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inheriting from :class:`gymnasium.ObservationWrapper`\nObservation wrappers are useful if you want to apply some function to the observations that are returned\nby an environment. If you implement an observation wrapper, you only need to define this transformation\nby implementing the :meth:`gymnasium.ObservationWrapper.observation` method. Moreover, you should remember to\nupdate the observation space, if the transformation changes the shape of observations (e.g. by transforming\ndictionaries into numpy arrays, as in the following example).\n\nImagine you have a 2D navigation task where the environment returns dictionaries as observations with\nkeys ``\"agent_position\"`` and ``\"target_position\"``. A common thing to do might be to throw away some degrees of\nfreedom and only consider the position of the target relative to the agent, i.e.\n``observation[\"target_position\"] - observation[\"agent_position\"]``. For this, you could implement an\nobservation wrapper like this:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n\nimport gymnasium as gym\nfrom gymnasium import ActionWrapper, ObservationWrapper, RewardWrapper, Wrapper\nfrom gymnasium.spaces import Box, Discrete\n\n\nclass RelativePosition(ObservationWrapper):\n def __init__(self, env):\n super().__init__(env)\n self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)\n\n def observation(self, obs):\n return obs[\"target\"] - obs[\"agent\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inheriting from :class:`gymnasium.ActionWrapper`\nAction wrappers can be used to apply a transformation to actions before applying them to the environment.\nIf you implement an action wrapper, you need to define that transformation by implementing\n:meth:`gymnasium.ActionWrapper.action`. Moreover, you should specify the domain of that transformation\nby updating the action space of the wrapper.\n\nLet\u2019s say you have an environment with action space of type :class:`gymnasium.spaces.Box`, but you would only like\nto use a finite subset of actions. Then, you might want to implement the following wrapper:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class DiscreteActions(ActionWrapper):\n def __init__(self, env, disc_to_cont):\n super().__init__(env)\n self.disc_to_cont = disc_to_cont\n self.action_space = Discrete(len(disc_to_cont))\n\n def action(self, act):\n return self.disc_to_cont[act]\n\n\nenv = gym.make(\"LunarLanderContinuous-v3\")\n# print(env.action_space) # Box(-1.0, 1.0, (2,), float32)\nwrapped_env = DiscreteActions(\n env, [np.array([1, 0]), np.array([-1, 0]), np.array([0, 1]), np.array([0, -1])]\n)\n# print(wrapped_env.action_space) # Discrete(4)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inheriting from :class:`gymnasium.RewardWrapper`\nReward wrappers are used to transform the reward that is returned by an environment.\nAs for the previous wrappers, you need to specify that transformation by implementing the\n:meth:`gymnasium.RewardWrapper.reward` method.\n\nLet us look at an example: Sometimes (especially when we do not have control over the reward\nbecause it is intrinsic), we want to clip the reward to a range to gain some numerical stability.\nTo do that, we could, for instance, implement the following wrapper:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from typing import SupportsFloat\n\n\nclass ClipReward(RewardWrapper):\n def __init__(self, env, min_reward, max_reward):\n super().__init__(env)\n self.min_reward = min_reward\n self.max_reward = max_reward\n\n def reward(self, r: SupportsFloat) -> SupportsFloat:\n return np.clip(r, self.min_reward, self.max_reward)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Inheriting from :class:`gymnasium.Wrapper`\nSometimes you might need to implement a wrapper that does some more complicated modifications (e.g. modify the\nreward based on data in ``info`` or change the rendering behavior).\nSuch wrappers can be implemented by inheriting from :class:`gymnasium.Wrapper`.\n\n- You can set a new action or observation space by defining ``self.action_space`` or ``self.observation_space`` in ``__init__``, respectively\n- You can set new metadata by defining ``self.metadata`` in ``__init__``\n- You can override :meth:`gymnasium.Wrapper.step`, :meth:`gymnasium.Wrapper.render`, :meth:`gymnasium.Wrapper.close` etc.\n\nIf you do this, you can access the environment that was passed\nto your wrapper (which *still* might be wrapped in some other wrapper) by accessing the attribute :attr:`env`.\n\nLet's also take a look at an example for this case. Most MuJoCo environments return a reward that consists\nof different terms: For instance, there might be a term that rewards the agent for completing the task and one term that\npenalizes large actions (i.e. energy usage). Usually, you can pass weight parameters for those terms during\ninitialization of the environment. However, *Reacher* does not allow you to do this! Nevertheless, all individual terms\nof the reward are returned in `info`, so let us build a wrapper for Reacher that allows us to weight those terms:\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class ReacherRewardWrapper(Wrapper):\n def __init__(self, env, reward_dist_weight, reward_ctrl_weight):\n super().__init__(env)\n self.reward_dist_weight = reward_dist_weight\n self.reward_ctrl_weight = reward_ctrl_weight\n\n def step(self, action):\n obs, _, terminated, truncated, info = self.env.step(action)\n reward = (\n self.reward_dist_weight * info[\"reward_dist\"]\n + self.reward_ctrl_weight * info[\"reward_ctrl\"]\n )\n return obs, reward, terminated, truncated, info"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,83 @@
"""
Handling Time Limits
====================
This tutorial explains how time limits should be correctly handled with `termination` and `truncation` signals.
The ``done`` signal received (in previous versions of OpenAI Gym < 0.26) from ``env.step`` indicated whether an episode has ended.
However, this signal did not distinguish whether the episode ended due to ``termination`` or ``truncation``.
Termination
-----------
Termination refers to the episode ending after reaching a terminal state that is defined as part of the environment
definition. Examples are - task success, task failure, robot falling down etc. Notably, this also includes episodes
ending in finite-horizon environments due to a time-limit inherent to the environment. Note that to preserve Markov
property, a representation of the remaining time must be present in the agent's observation in finite-horizon environments.
`(Reference) <https://arxiv.org/abs/1712.00378>`_
Truncation
----------
Truncation refers to the episode ending after an externally defined condition (that is outside the scope of the Markov
Decision Process). This could be a time-limit, a robot going out of bounds etc.
An infinite-horizon environment is an obvious example of where this is needed. We cannot wait forever for the episode
to complete, so we set a practical time-limit after which we forcibly halt the episode. The last state in this case is
not a terminal state since it has a non-zero transition probability of moving to another state as per the Markov
Decision Process that defines the RL problem. This is also different from time-limits in finite horizon environments
as the agent in this case has no idea about this time-limit.
"""
# %%
# Importance in learning code
# ---------------------------
# Bootstrapping (using one or more estimated values of a variable to update estimates of the same variable) is a key
# aspect of Reinforcement Learning. A value function will tell you how much discounted reward you will get from a
# particular state if you follow a given policy. When an episode stops at any given point, by looking at the value of
# the final state, the agent is able to estimate how much discounted reward could have been obtained if the episode has
# continued. This is an example of handling truncation.
#
# More formally, a common example of bootstrapping in RL is updating the estimate of the Q-value function,
#
# .. math::
# Q_{target}(o_t, a_t) = r_t + \gamma . \max_a(Q(o_{t+1}, a_{t+1}))
#
#
# In classical RL, the new ``Q`` estimate is a weighted average of the previous ``Q`` estimate and ``Q_target`` while in Deep
# Q-Learning, the error between ``Q_target`` and the previous ``Q`` estimate is minimized.
#
# However, at the terminal state, bootstrapping is not done,
#
# .. math::
# Q_{target}(o_t, a_t) = r_t
#
# This is where the distinction between termination and truncation becomes important. When an episode ends due to
# termination we don't bootstrap, when it ends due to truncation, we bootstrap.
#
# While using gymnasium environments, the ``done`` signal (default for < v0.26) is frequently used to determine whether to
# bootstrap or not. However, this is incorrect since it does not differentiate between termination and truncation.
#
# A simple example of value functions is shown below. This is an illustrative example and not part of any specific algorithm.
#
# .. code:: python
#
# # INCORRECT
# vf_target = rew + gamma * (1 - done) * vf_next_state
#
# This is incorrect in the case of episode ending due to a truncation, where bootstrapping needs to happen but it doesn't.
# %%
# Solution
# ----------
#
# From v0.26 onwards, Gymnasium's ``env.step`` API returns both termination and truncation information explicitly.
# In the previous version truncation information was supplied through the info key ``TimeLimit.truncated``.
# The correct way to handle terminations and truncations now is,
#
# .. code:: python
#
# # terminated = done and 'TimeLimit.truncated' not in info
# # This was needed in previous versions.
#
# vf_target = rew + gamma * (1 - terminated) * vf_next_state

View File

@@ -0,0 +1,212 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Load custom quadruped robot environments\n\nIn this tutorial create a mujoco quadruped walking environment using a model file (ending in `.xml`) without having to create a new class.\n\nSteps:\n\n0. Get your **MJCF** (or **URDF**) model file of your robot.\n - Create your own model (see the MuJoCo Guide) or,\n - Find a ready-made model (in this tutorial, we will use a model from the MuJoCo Menagerie collection).\n1. Load the model with the `xml_file` argument.\n2. Tweak the environment parameters to get the desired behavior.\n 1. Tweak the environment simulation parameters.\n 2. Tweak the environment termination parameters.\n 3. Tweak the environment reward parameters.\n 4. Tweak the environment observation parameters.\n3. Train an agent to move your robot.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# The reader is expected to be familiar with the `Gymnasium` API & library, the basics of robotics,\n# and the included `Gymnasium/MuJoCo` environments with the robot model they use.\n# Familiarity with the **MJCF** file model format and the `MuJoCo` simulator is not required but is recommended."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup\nWe will need `gymnasium>=1.0.0`.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n\nimport gymnasium as gym\n\n\n# Make sure Gymnasium is properly installed\n# You can run this in your terminal:\n# pip install \"gymnasium>=1.0.0\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 0.1 - Download a Robot Model\nIn this tutorial we will load the Unitree Go1 robot from the excellent MuJoCo Menagerie robot model collection.\nGo1 is a quadruped robot, controlling it to move is a significant learning problem,\nmuch harder than the `Gymnasium/MuJoCo/Ant` environment.\n\nNote: The original tutorial includes an image of the Unitree Go1 robot in a flat terrain scene.\nYou can view this image at: https://github.com/google-deepmind/mujoco_menagerie/blob/main/unitree_go1/go1.png?raw=true\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# You can download the whole MuJoCo Menagerie collection (which includes `Go1`):\n# git clone https://github.com/google-deepmind/mujoco_menagerie.git\n\n# You can use any other quadruped robot with this tutorial, just adjust the environment parameter values for your robot."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1 - Load the model\nTo load the model, all we have to do is use the `xml_file` argument with the `Ant-v5` framework.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Basic loading (uncomment to use)\n# env = gym.make('Ant-v5', xml_file='./mujoco_menagerie/unitree_go1/scene.xml')\n\n# Although this is enough to load the model, we will need to tweak some environment parameters\n# to get the desired behavior for our environment, so we will also explicitly set the simulation,\n# termination, reward and observation arguments, which we will tweak in the next step.\n\nenv = gym.make(\n \"Ant-v5\",\n xml_file=\"./mujoco_menagerie/unitree_go1/scene.xml\",\n forward_reward_weight=0,\n ctrl_cost_weight=0,\n contact_cost_weight=0,\n healthy_reward=0,\n main_body=1,\n healthy_z_range=(0, np.inf),\n include_cfrc_ext_in_observation=True,\n exclude_current_positions_from_observation=False,\n reset_noise_scale=0,\n frame_skip=1,\n max_episode_steps=1000,\n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2 - Tweaking the Environment Parameters\nTweaking the environment parameters is essential to get the desired behavior for learning.\nIn the following subsections, the reader is encouraged to consult the documentation of\nthe arguments for more detailed information.\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2.1 - Tweaking the Environment Simulation Parameters\nThe arguments of interest are `frame_skip`, `reset_noise_scale` and `max_episode_steps`.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# We want to tweak the `frame_skip` parameter to get `dt` to an acceptable value\n# (typical values are `dt` \u2208 [0.01, 0.1] seconds),\n\n# Reminder: dt = frame_skip \u00d7 model.opt.timestep, where `model.opt.timestep` is the integrator\n# time step selected in the MJCF model file.\n\n# The `Go1` model we are using has an integrator timestep of `0.002`, so by selecting\n# `frame_skip=25` we can set the value of `dt` to `0.05s`.\n\n# To avoid overfitting the policy, `reset_noise_scale` should be set to a value appropriate\n# to the size of the robot, we want the value to be as large as possible without the initial\n# distribution of states being invalid (`Terminal` regardless of control actions),\n# for `Go1` we choose a value of `0.1`.\n\n# And `max_episode_steps` determines the number of steps per episode before `truncation`,\n# here we set it to 1000 to be consistent with the based `Gymnasium/MuJoCo` environments,\n# but if you need something higher you can set it so.\n\nenv = gym.make(\n \"Ant-v5\",\n xml_file=\"./mujoco_menagerie/unitree_go1/scene.xml\",\n forward_reward_weight=0,\n ctrl_cost_weight=0,\n contact_cost_weight=0,\n healthy_reward=0,\n main_body=1,\n healthy_z_range=(0, np.inf),\n include_cfrc_ext_in_observation=True,\n exclude_current_positions_from_observation=False,\n reset_noise_scale=0.1, # set to avoid policy overfitting\n frame_skip=25, # set dt=0.05\n max_episode_steps=1000, # kept at 1000\n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2.2 - Tweaking the Environment Termination Parameters\nTermination is important for robot environments to avoid sampling \"useless\" time steps.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# The arguments of interest are `terminate_when_unhealthy` and `healthy_z_range`.\n\n# We want to set `healthy_z_range` to terminate the environment when the robot falls over,\n# or jumps really high, here we have to choose a value that is logical for the height of the robot,\n# for `Go1` we choose `(0.195, 0.75)`.\n# Note: `healthy_z_range` checks the absolute value of the height of the robot,\n# so if your scene contains different levels of elevation it should be set to `(-np.inf, np.inf)`\n\n# We could also set `terminate_when_unhealthy=False` to disable termination altogether,\n# which is not desirable in the case of `Go1`.\n\nenv = gym.make(\n \"Ant-v5\",\n xml_file=\"./mujoco_menagerie/unitree_go1/scene.xml\",\n forward_reward_weight=0,\n ctrl_cost_weight=0,\n contact_cost_weight=0,\n healthy_reward=0,\n main_body=1,\n healthy_z_range=(\n 0.195,\n 0.75,\n ), # set to avoid sampling steps where the robot has fallen or jumped too high\n include_cfrc_ext_in_observation=True,\n exclude_current_positions_from_observation=False,\n reset_noise_scale=0.1,\n frame_skip=25,\n max_episode_steps=1000,\n)\n\n# Note: If you need a different termination condition, you can write your own `TerminationWrapper`\n# (see the documentation)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2.3 - Tweaking the Environment Reward Parameters\nThe arguments of interest are `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight`,\n`healthy_reward`, and `main_body`.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# For the arguments `forward_reward_weight`, `ctrl_cost_weight`, `contact_cost_weight` and `healthy_reward`\n# we have to pick values that make sense for our robot, you can use the default `MuJoCo/Ant`\n# parameters for references and tweak them if a change is needed for your environment.\n# In the case of `Go1` we only change the `ctrl_cost_weight` since it has a higher actuator force range.\n\n# For the argument `main_body` we have to choose which body part is the main body\n# (usually called something like \"torso\" or \"trunk\" in the model file) for the calculation\n# of the `forward_reward`, in the case of `Go1` it is the `\"trunk\"`\n# (Note: in most cases including this one, it can be left at the default value).\n\nenv = gym.make(\n \"Ant-v5\",\n xml_file=\"./mujoco_menagerie/unitree_go1/scene.xml\",\n forward_reward_weight=1, # kept the same as the 'Ant' environment\n ctrl_cost_weight=0.05, # changed because of the stronger motors of `Go1`\n contact_cost_weight=5e-4, # kept the same as the 'Ant' environment\n healthy_reward=1, # kept the same as the 'Ant' environment\n main_body=1, # represents the \"trunk\" of the `Go1` robot\n healthy_z_range=(0.195, 0.75),\n include_cfrc_ext_in_observation=True,\n exclude_current_positions_from_observation=False,\n reset_noise_scale=0.1,\n frame_skip=25,\n max_episode_steps=1000,\n)\n\n# Note: If you need a different reward function, you can write your own `RewardWrapper`\n# (see the documentation)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2.4 - Tweaking the Environment Observation Parameters\nThe arguments of interest are `include_cfrc_ext_in_observation` and\n`exclude_current_positions_from_observation`.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Here for `Go1` we have no particular reason to change them.\n\nenv = gym.make(\n \"Ant-v5\",\n xml_file=\"./mujoco_menagerie/unitree_go1/scene.xml\",\n forward_reward_weight=1,\n ctrl_cost_weight=0.05,\n contact_cost_weight=5e-4,\n healthy_reward=1,\n main_body=1,\n healthy_z_range=(0.195, 0.75),\n include_cfrc_ext_in_observation=True, # kept the same as the 'Ant' environment\n exclude_current_positions_from_observation=False, # kept the same as the 'Ant' environment\n reset_noise_scale=0.1,\n frame_skip=25,\n max_episode_steps=1000,\n)\n\n\n# Note: If you need additional observation elements (such as additional sensors),\n# you can write your own `ObservationWrapper` (see the documentation)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 3 - Train your Agent\nFinally, we are done, we can use a RL algorithm to train an agent to walk/run the `Go1` robot.\nNote: If you have followed this guide with your own robot model, you may discover\nduring training that some environment parameters were not as desired,\nfeel free to go back to step 2 and change anything as needed.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def main():\n \"\"\"Run the final Go1 environment setup.\"\"\"\n # Note: The original tutorial includes an image showing the Go1 robot in the environment.\n # The image is available at: https://github.com/Kallinteris-Andreas/Gymnasium-kalli/assets/30759571/bf1797a3-264d-47de-b14c-e3c16072f695\n\n env = gym.make(\n \"Ant-v5\",\n xml_file=\"./mujoco_menagerie/unitree_go1/scene.xml\",\n forward_reward_weight=1,\n ctrl_cost_weight=0.05,\n contact_cost_weight=5e-4,\n healthy_reward=1,\n main_body=1,\n healthy_z_range=(0.195, 0.75),\n include_cfrc_ext_in_observation=True,\n exclude_current_positions_from_observation=False,\n reset_noise_scale=0.1,\n frame_skip=25,\n max_episode_steps=1000,\n render_mode=\"rgb_array\", # Change to \"human\" to visualize\n )\n\n # Example of running the environment for a few steps\n obs, info = env.reset()\n\n for _ in range(100):\n action = env.action_space.sample() # Replace with your agent's action\n obs, reward, terminated, truncated, info = env.step(action)\n\n if terminated or truncated:\n obs, info = env.reset()\n\n env.close()\n print(\"Environment tested successfully!\")\n\n # Now you would typically:\n # 1. Set up your RL algorithm\n # 2. Train the agent\n # 3. Evaluate the agent's performance"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Epilogue\nYou can follow this guide to create most quadruped environments.\nTo create humanoid/bipedal robots, you can also follow this guide using the `Gymnasium/MuJoCo/Humnaoid-v5` framework.\n\nNote: The original tutorial includes a video demonstration of the trained Go1 robot walking.\nThe video shows the robot achieving a speed of up to 4.7 m/s according to the manufacturer.\nIn the original tutorial, this video is embedded from:\nhttps://odysee.com/$/embed/@Kallinteris-Andreas:7/video0-step-0-to-step-1000:1?r=6fn5jA9uZQUZXGKVpwtqjz1eyJcS3hj3\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Author: @kallinteris-andreas (https://github.com/Kallinteris-Andreas)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,724 @@
"""
Speeding up A2C Training with Vector Envs
=========================================
This tutorial demonstrates training with vector environments to it speed up.
"""
# %%
# Notice
# ------
#
# If you encounter an RuntimeError like the following comment raised on multiprocessing/spawn.py, wrap up the code from ``gym.make_vec=`` or ``gym.vector.AsyncVectorEnv`` to the end of the code by ``if__name__ == '__main__'``.
#
# ``An attempt has been made to start a new process before the current process has finished its bootstrapping phase.``
#
# %%
#
# ------------------------------
#
# %%
# Introduction
# ------------
#
# In this tutorial, you'll learn how to use vectorized environments to train an Advantage Actor-Critic agent.
# We are going to use A2C, which is the synchronous version of the A3C algorithm [1].
#
# Vectorized environments [3] can help to achieve quicker and more robust training by allowing multiple instances
# of the same environment to run in parallel (on multiple CPUs). This can significantly reduce the variance and thus speeds up the training.
#
# We will implement an Advantage Actor-Critic from scratch to look at how you can feed batched states into your networks to get a vector of actions
# (one action per environment) and calculate the losses for actor and critic on minibatches of transitions.
# Each minibatch contains the transitions of one sampling phase: `n_steps_per_update` steps are executed in `n_envs` environments in parallel
# (multiply the two to get the number of transitions in a minibatch). After each sampling phase, the losses are calculated and one gradient step is executed.
# To calculate the advantages, we are going to use the Generalized Advantage Estimation (GAE) method [2], which balances the tradeoff
# between variance and bias of the advantage estimates.
#
# The A2C agent class is initialized with the number of features of the input state, the number of actions the agent can take,
# the learning rates and the number of environments that run in parallel to collect experiences. The actor and critic networks are defined
# and their respective optimizers are initialized. The forward pass of the networks takes in a batched vector of states and returns a tensor of state values
# and a tensor of action logits. The select_action method returns a tuple of the chosen actions, the log-probs of those actions, and the state values for each action.
# In addition, it also returns the entropy of the policy distribution, which is subtracted from the loss later (with a weighting factor `ent_coef`) to encourage exploration.
#
# The get_losses function calculates the losses for the actor and critic networks (using GAE), which are then updated using the update_parameters function.
#
# %%
#
# ------------------------------
#
# Author: Till Zemann
# License: MIT License
from __future__ import annotations
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torch import optim
from tqdm import tqdm
import gymnasium as gym
# %%
# Advantage Actor-Critic (A2C)
# ----------------------------
#
# The Actor-Critic combines elements of value-based and policy-based methods. In A2C, the agent has two separate neural networks:
# a critic network that estimates the state-value function, and an actor network that outputs logits for a categorical probability distribution over all actions.
# The critic network is trained to minimize the mean squared error between the predicted state values and the actual returns received by the agent
# (this is equivalent to minimizing the squared advantages, because the advantage of an action is as the difference between the return and the state-value: A(s,a) = Q(s,a) - V(s).
# The actor network is trained to maximize the expected return by selecting actions that have high expected values according to the critic network.
#
# The focus of this tutorial will not be on the details of A2C itself. Instead, the tutorial will focus on how to use vectorized environments
# and domain randomization to accelerate the training process for A2C (and other reinforcement learning algorithms).
#
# %%
#
# ------------------------------
#
class A2C(nn.Module):
"""
(Synchronous) Advantage Actor-Critic agent class
Args:
n_features: The number of features of the input state.
n_actions: The number of actions the agent can take.
device: The device to run the computations on (running on a GPU might be quicker for larger Neural Nets,
for this code CPU is totally fine).
critic_lr: The learning rate for the critic network (should usually be larger than the actor_lr).
actor_lr: The learning rate for the actor network.
n_envs: The number of environments that run in parallel (on multiple CPUs) to collect experiences.
"""
def __init__(
self,
n_features: int,
n_actions: int,
device: torch.device,
critic_lr: float,
actor_lr: float,
n_envs: int,
) -> None:
"""Initializes the actor and critic networks and their respective optimizers."""
super().__init__()
self.device = device
self.n_envs = n_envs
critic_layers = [
nn.Linear(n_features, 32),
nn.ReLU(),
nn.Linear(32, 32),
nn.ReLU(),
nn.Linear(32, 1), # estimate V(s)
]
actor_layers = [
nn.Linear(n_features, 32),
nn.ReLU(),
nn.Linear(32, 32),
nn.ReLU(),
nn.Linear(
32, n_actions
), # estimate action logits (will be fed into a softmax later)
]
# define actor and critic networks
self.critic = nn.Sequential(*critic_layers).to(self.device)
self.actor = nn.Sequential(*actor_layers).to(self.device)
# define optimizers for actor and critic
self.critic_optim = optim.RMSprop(self.critic.parameters(), lr=critic_lr)
self.actor_optim = optim.RMSprop(self.actor.parameters(), lr=actor_lr)
def forward(self, x: np.ndarray) -> tuple[torch.Tensor, torch.Tensor]:
"""
Forward pass of the networks.
Args:
x: A batched vector of states.
Returns:
state_values: A tensor with the state values, with shape [n_envs,].
action_logits_vec: A tensor with the action logits, with shape [n_envs, n_actions].
"""
x = torch.Tensor(x).to(self.device)
state_values = self.critic(x) # shape: [n_envs,]
action_logits_vec = self.actor(x) # shape: [n_envs, n_actions]
return (state_values, action_logits_vec)
def select_action(
self, x: np.ndarray
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Returns a tuple of the chosen actions and the log-probs of those actions.
Args:
x: A batched vector of states.
Returns:
actions: A tensor with the actions, with shape [n_steps_per_update, n_envs].
action_log_probs: A tensor with the log-probs of the actions, with shape [n_steps_per_update, n_envs].
state_values: A tensor with the state values, with shape [n_steps_per_update, n_envs].
"""
state_values, action_logits = self.forward(x)
action_pd = torch.distributions.Categorical(
logits=action_logits
) # implicitly uses softmax
actions = action_pd.sample()
action_log_probs = action_pd.log_prob(actions)
entropy = action_pd.entropy()
return actions, action_log_probs, state_values, entropy
def get_losses(
self,
rewards: torch.Tensor,
action_log_probs: torch.Tensor,
value_preds: torch.Tensor,
entropy: torch.Tensor,
masks: torch.Tensor,
gamma: float,
lam: float,
ent_coef: float,
device: torch.device,
) -> tuple[torch.Tensor, torch.Tensor]:
"""
Computes the loss of a minibatch (transitions collected in one sampling phase) for actor and critic
using Generalized Advantage Estimation (GAE) to compute the advantages (https://arxiv.org/abs/1506.02438).
Args:
rewards: A tensor with the rewards for each time step in the episode, with shape [n_steps_per_update, n_envs].
action_log_probs: A tensor with the log-probs of the actions taken at each time step in the episode, with shape [n_steps_per_update, n_envs].
value_preds: A tensor with the state value predictions for each time step in the episode, with shape [n_steps_per_update, n_envs].
masks: A tensor with the masks for each time step in the episode, with shape [n_steps_per_update, n_envs].
gamma: The discount factor.
lam: The GAE hyperparameter. (lam=1 corresponds to Monte-Carlo sampling with high variance and no bias,
and lam=0 corresponds to normal TD-Learning that has a low variance but is biased
because the estimates are generated by a Neural Net).
device: The device to run the computations on (e.g. CPU or GPU).
Returns:
critic_loss: The critic loss for the minibatch.
actor_loss: The actor loss for the minibatch.
"""
T = len(rewards)
advantages = torch.zeros(T, self.n_envs, device=device)
# compute the advantages using GAE
gae = 0.0
for t in reversed(range(T - 1)):
td_error = (
rewards[t] + gamma * masks[t] * value_preds[t + 1] - value_preds[t]
)
gae = td_error + gamma * lam * masks[t] * gae
advantages[t] = gae
# calculate the loss of the minibatch for actor and critic
critic_loss = advantages.pow(2).mean()
# give a bonus for higher entropy to encourage exploration
actor_loss = (
-(advantages.detach() * action_log_probs).mean() - ent_coef * entropy.mean()
)
return (critic_loss, actor_loss)
def update_parameters(
self, critic_loss: torch.Tensor, actor_loss: torch.Tensor
) -> None:
"""
Updates the parameters of the actor and critic networks.
Args:
critic_loss: The critic loss.
actor_loss: The actor loss.
"""
self.critic_optim.zero_grad()
critic_loss.backward()
self.critic_optim.step()
self.actor_optim.zero_grad()
actor_loss.backward()
self.actor_optim.step()
# %%
# Using Vectorized Environments
# -----------------------------
#
# When you calculate the losses for the two Neural Networks over only one epoch, it might have a high variance. With vectorized environments,
# we can play with `n_envs` in parallel and thus get up to a linear speedup (meaning that in theory, we collect samples `n_envs` times quicker)
# that we can use to calculate the loss for the current policy and critic network. When we are using more samples to calculate the loss,
# it will have a lower variance and theirfore leads to quicker learning.
#
# A2C is a synchronous method, meaning that the parameter updates to Networks take place deterministically (after each sampling phase),
# but we can still make use of asynchronous vector envs to spawn multiple processes for parallel environment execution.
#
# The simplest way to create vector environments is by calling `gym.vector.make`, which creates multiple instances of the same environment:
#
envs = gym.make_vec("LunarLander-v3", num_envs=3, max_episode_steps=600)
# %%
# Domain Randomization
# --------------------
#
# If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
# and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
#
# Manually setting up 3 parallel 'LunarLander-v3' envs with different parameters:
envs = gym.vector.SyncVectorEnv(
[
lambda: gym.make(
"LunarLander-v3",
gravity=-10.0,
enable_wind=True,
wind_power=15.0,
turbulence_power=1.5,
max_episode_steps=600,
),
lambda: gym.make(
"LunarLander-v3",
gravity=-9.8,
enable_wind=True,
wind_power=10.0,
turbulence_power=1.3,
max_episode_steps=600,
),
lambda: gym.make(
"LunarLander-v3", gravity=-7.0, enable_wind=False, max_episode_steps=600
),
]
)
# %%
#
# ------------------------------
#
# Randomly generating the parameters for 3 parallel 'LunarLander-v3' envs, using `np.clip` to stay in the recommended parameter space:
#
envs = gym.vector.SyncVectorEnv(
[
lambda: gym.make(
"LunarLander-v3",
gravity=np.clip(
np.random.normal(loc=-10.0, scale=1.0), a_min=-11.99, a_max=-0.01
),
enable_wind=np.random.choice([True, False]),
wind_power=np.clip(
np.random.normal(loc=15.0, scale=1.0), a_min=0.01, a_max=19.99
),
turbulence_power=np.clip(
np.random.normal(loc=1.5, scale=0.5), a_min=0.01, a_max=1.99
),
max_episode_steps=600,
)
for i in range(3)
]
)
# %%
#
# ------------------------------
#
# Here we are using normal distributions with the standard parameterization of the environment as the mean and an arbitrary standard deviation (scale).
# Depending on the problem, you can experiment with higher variance and use different distributions as well.
#
# If you are training on the same `n_envs` environments for the entire training time, and `n_envs` is a relatively low number
# (in proportion to how complex the environment is), you might still get some overfitting to the specific parameterizations that you picked.
# To mitigate this, you can either pick a high number of randomly parameterized environments or remake your environments every couple of sampling phases
# to generate a new set of pseudo-random parameters.
#
# %%
# Setup
# -----
#
# environment hyperparams
n_envs = 10
n_updates = 1000
n_steps_per_update = 128
randomize_domain = False
# agent hyperparams
gamma = 0.999
lam = 0.95 # hyperparameter for GAE
ent_coef = 0.01 # coefficient for the entropy bonus (to encourage exploration)
actor_lr = 0.001
critic_lr = 0.005
# Note: the actor has a slower learning rate so that the value targets become
# more stationary and are theirfore easier to estimate for the critic
# environment setup
if randomize_domain:
envs = gym.vector.AsyncVectorEnv(
[
lambda: gym.make(
"LunarLander-v3",
gravity=np.clip(
np.random.normal(loc=-10.0, scale=1.0), a_min=-11.99, a_max=-0.01
),
enable_wind=np.random.choice([True, False]),
wind_power=np.clip(
np.random.normal(loc=15.0, scale=1.0), a_min=0.01, a_max=19.99
),
turbulence_power=np.clip(
np.random.normal(loc=1.5, scale=0.5), a_min=0.01, a_max=1.99
),
max_episode_steps=600,
)
for i in range(n_envs)
]
)
else:
envs = gym.make_vec("LunarLander-v3", num_envs=n_envs, max_episode_steps=600)
obs_shape = envs.single_observation_space.shape[0]
action_shape = envs.single_action_space.n
# set the device
use_cuda = False
if use_cuda:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
device = torch.device("cpu")
# init the agent
agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)
# %%
# Training the A2C Agent
# ----------------------
#
# For our training loop, we are using the `RecordEpisodeStatistics` wrapper to record the episode lengths and returns and we are also saving
# the losses and entropies to plot them after the agent finished training.
#
# You may notice that we don't reset the vectorized envs at the start of each episode like we would usually do.
# This is because each environment resets automatically once the episode finishes (each environment takes a different number of timesteps to finish
# an episode because of the random seeds). As a result, we are also not collecting data in `episodes`, but rather just play a certain number of steps
# (`n_steps_per_update`) in each environment (as an example, this could mean that we play 20 timesteps to finish an episode and then
# use the rest of the timesteps to begin a new one).
#
# create a wrapper environment to save episode returns and episode lengths
envs_wrapper = gym.wrappers.vector.RecordEpisodeStatistics(
envs, buffer_length=n_envs * n_updates
)
critic_losses = []
actor_losses = []
entropies = []
# use tqdm to get a progress bar for training
for sample_phase in tqdm(range(n_updates)):
# we don't have to reset the envs, they just continue playing
# until the episode is over and then reset automatically
# reset lists that collect experiences of an episode (sample phase)
ep_value_preds = torch.zeros(n_steps_per_update, n_envs, device=device)
ep_rewards = torch.zeros(n_steps_per_update, n_envs, device=device)
ep_action_log_probs = torch.zeros(n_steps_per_update, n_envs, device=device)
masks = torch.zeros(n_steps_per_update, n_envs, device=device)
# at the start of training reset all envs to get an initial state
if sample_phase == 0:
states, info = envs_wrapper.reset(seed=42)
# play n steps in our parallel environments to collect data
for step in range(n_steps_per_update):
# select an action A_{t} using S_{t} as input for the agent
actions, action_log_probs, state_value_preds, entropy = agent.select_action(
states
)
# perform the action A_{t} in the environment to get S_{t+1} and R_{t+1}
states, rewards, terminated, truncated, infos = envs_wrapper.step(
actions.cpu().numpy()
)
ep_value_preds[step] = torch.squeeze(state_value_preds)
ep_rewards[step] = torch.tensor(rewards, device=device)
ep_action_log_probs[step] = action_log_probs
# add a mask (for the return calculation later);
# for each env the mask is 1 if the episode is ongoing and 0 if it is terminated (not by truncation!)
masks[step] = torch.tensor([not term for term in terminated])
# calculate the losses for actor and critic
critic_loss, actor_loss = agent.get_losses(
ep_rewards,
ep_action_log_probs,
ep_value_preds,
entropy,
masks,
gamma,
lam,
ent_coef,
device,
)
# update the actor and critic networks
agent.update_parameters(critic_loss, actor_loss)
# log the losses and entropy
critic_losses.append(critic_loss.detach().cpu().numpy())
actor_losses.append(actor_loss.detach().cpu().numpy())
entropies.append(entropy.detach().mean().cpu().numpy())
# %%
# Plotting
# --------
#
""" plot the results """
# %matplotlib inline
rolling_length = 20
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 5))
fig.suptitle(
f"Training plots for {agent.__class__.__name__} in the LunarLander-v3 environment \n \
(n_envs={n_envs}, n_steps_per_update={n_steps_per_update}, randomize_domain={randomize_domain})"
)
# episode return
axs[0][0].set_title("Episode Returns")
episode_returns_moving_average = (
np.convolve(
np.array(envs_wrapper.return_queue).flatten(),
np.ones(rolling_length),
mode="valid",
)
/ rolling_length
)
axs[0][0].plot(
np.arange(len(episode_returns_moving_average)) / n_envs,
episode_returns_moving_average,
)
axs[0][0].set_xlabel("Number of episodes")
# entropy
axs[1][0].set_title("Entropy")
entropy_moving_average = (
np.convolve(np.array(entropies), np.ones(rolling_length), mode="valid")
/ rolling_length
)
axs[1][0].plot(entropy_moving_average)
axs[1][0].set_xlabel("Number of updates")
# critic loss
axs[0][1].set_title("Critic Loss")
critic_losses_moving_average = (
np.convolve(
np.array(critic_losses).flatten(), np.ones(rolling_length), mode="valid"
)
/ rolling_length
)
axs[0][1].plot(critic_losses_moving_average)
axs[0][1].set_xlabel("Number of updates")
# actor loss
axs[1][1].set_title("Actor Loss")
actor_losses_moving_average = (
np.convolve(np.array(actor_losses).flatten(), np.ones(rolling_length), mode="valid")
/ rolling_length
)
axs[1][1].plot(actor_losses_moving_average)
axs[1][1].set_xlabel("Number of updates")
plt.tight_layout()
plt.show()
# %%
# .. image:: /_static/img/tutorials/vector_env_a2c_training_plots.png
# :alt: training_plots
#
# %%
# Performance Analysis of Synchronous and Asynchronous Vectorized Environments
# ----------------------------------------------------------------------------
#
# %%
#
# ------------------------------
#
# Asynchronous environments can lead to quicker training times and a higher speedup
# for data collection compared to synchronous environments. This is because asynchronous environments
# allow multiple agents to interact with their environments in parallel,
# while synchronous environments run multiple environments serially.
# This results in better efficiency and faster training times for asynchronous environments.
#
# %%
# .. image:: /_static/img/tutorials/vector_env_performance_plots.png
# :alt: performance_plots
#
# %%
#
# ------------------------------
#
# According to the Karp-Flatt metric (a metric used in parallel computing to estimate the limit for the
# speedup when scaling up the number of parallel processes, here the number of environments),
# the estimated max. speedup for asynchronous environments is 57, while the estimated maximum speedup
# for synchronous environments is 21. This suggests that asynchronous environments have significantly
# faster training times compared to synchronous environments (see graphs).
#
# %%
# .. image:: /_static/img/tutorials/vector_env_karp_flatt_plot.png
# :alt: karp_flatt_metric
#
# %%
#
# ------------------------------
#
# However, it is important to note that increasing the number of parallel vector environments
# can lead to slower training times after a certain number of environments (see plot below, where the
# agent was trained until the mean training returns were above -120). The slower training times might occur
# because the gradients of the environments are good enough after a relatively low number of environments
# (especially if the environment is not very complex). In this case, increasing the number of environments
# does not increase the learning speed, and actually increases the runtime, possibly due to the additional time
# needed to calculate the gradients. For LunarLander-v3, the best performing configuration used a AsyncVectorEnv
# with 10 parallel environments, but environments with a higher complexity may require more
# parallel environments to achieve optimal performance.
#
# %%
# .. image:: /_static/img/tutorials/vector_env_runtime_until_threshold.png
# :alt: runtime_until_threshold_plot
#
# %%
# Saving/ Loading Weights
# -----------------------
#
save_weights = False
load_weights = False
actor_weights_path = "weights/actor_weights.h5"
critic_weights_path = "weights/critic_weights.h5"
if not os.path.exists("weights"):
os.mkdir("weights")
""" save network weights """
if save_weights:
torch.save(agent.actor.state_dict(), actor_weights_path)
torch.save(agent.critic.state_dict(), critic_weights_path)
""" load network weights """
if load_weights:
agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr)
agent.actor.load_state_dict(torch.load(actor_weights_path))
agent.critic.load_state_dict(torch.load(critic_weights_path))
agent.actor.eval()
agent.critic.eval()
# %%
# Showcase the Agent
# ------------------
#
""" play a couple of showcase episodes """
n_showcase_episodes = 3
for episode in range(n_showcase_episodes):
print(f"starting episode {episode}...")
# create a new sample environment to get new random parameters
if randomize_domain:
env = gym.make(
"LunarLander-v3",
render_mode="human",
gravity=np.clip(
np.random.normal(loc=-10.0, scale=2.0), a_min=-11.99, a_max=-0.01
),
enable_wind=np.random.choice([True, False]),
wind_power=np.clip(
np.random.normal(loc=15.0, scale=2.0), a_min=0.01, a_max=19.99
),
turbulence_power=np.clip(
np.random.normal(loc=1.5, scale=1.0), a_min=0.01, a_max=1.99
),
max_episode_steps=500,
)
else:
env = gym.make("LunarLander-v3", render_mode="human", max_episode_steps=500)
# get an initial state
state, info = env.reset()
# play one episode
done = False
while not done:
# select an action A_{t} using S_{t} as input for the agent
with torch.no_grad():
action, _, _, _ = agent.select_action(state[None, :])
# perform the action A_{t} in the environment to get S_{t+1} and R_{t+1}
state, reward, terminated, truncated, info = env.step(action.item())
# update if the environment is done
done = terminated or truncated
env.close()
# %%
# Try playing the environment yourself
# ------------------------------------
#
# from gymnasium.utils.play import play
#
# play(gym.make('LunarLander-v3', render_mode='rgb_array'),
# keys_to_action={'w': 2, 'a': 1, 'd': 3}, noop=0)
# %%
# References
# ----------
#
# [1] V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu. "Asynchronous Methods for Deep Reinforcement Learning" ICML (2016).
#
# [2] J. Schulman, P. Moritz, S. Levine, M. Jordan and P. Abbeel. "High-dimensional continuous control using generalized advantage estimation." ICLR (2016).
#
# [3] Gymnasium Documentation: Vector environments. (URL: https://gymnasium.farama.org/api/vector/)

View File

@@ -0,0 +1,261 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Solving Blackjack with Tabular Q-Learning\n\nThis tutorial trains an agent for BlackJack using tabular Q-learning.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"file://_static/img/tutorials/blackjack_AE_loop.jpg\" width=\"650\" alt=\"agent-environment-diagram\" class=\"only-light\">\n<img src=\"file://_static/img/tutorials/blackjack_AE_loop_dark.png\" width=\"650\" alt=\"agent-environment-diagram\" class=\"only-dark\">\n\nIn this tutorial, we\u2019ll explore and solve the *Blackjack-v1*\nenvironment.\n\n**Blackjack** is one of the most popular casino card games that is also\ninfamous for being beatable under certain conditions. This version of\nthe game uses an infinite deck (we draw the cards with replacement), so\ncounting cards won\u2019t be a viable strategy in our simulated game.\nFull documentation can be found at https://gymnasium.farama.org/environments/toy_text/blackjack\n\n**Objective**: To win, your card sum should be greater than the\ndealers without exceeding 21.\n\n**Actions**: Agents can pick between two actions:\n - stand (0): the player takes no more cards\n - hit (1): the player will be given another card, however the player could get over 21 and bust\n\n**Approach**: To solve this environment by yourself, you can pick your\nfavorite discrete RL algorithm. The presented solution uses *Q-learning*\n(a model-free RL algorithm).\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imports and Environment Setup\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Author: Till Zemann\n# License: MIT License\n\nfrom __future__ import annotations\n\nfrom collections import defaultdict\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport seaborn as sns\nfrom matplotlib.patches import Patch\nfrom tqdm import tqdm\n\nimport gymnasium as gym\n\n\n# Let's start by creating the blackjack environment.\n# Note: We are going to follow the rules from Sutton & Barto.\n# Other versions of the game can be found below for you to experiment.\n\nenv = gym.make(\"Blackjack-v1\", sab=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
".. code:: py\n\n # Other possible environment configurations are:\n\n env = gym.make('Blackjack-v1', natural=True, sab=False)\n # Whether to give an additional reward for starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21).\n\n env = gym.make('Blackjack-v1', natural=False, sab=False)\n # Whether to follow the exact rules outlined in the book by Sutton and Barto. If `sab` is `True`, the keyword argument `natural` will be ignored.\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Observing the environment\n\nFirst of all, we call ``env.reset()`` to start an episode. This function\nresets the environment to a starting position and returns an initial\n``observation``. We usually also set ``done = False``. This variable\nwill be useful later to check if a game is terminated (i.e., the player wins or loses).\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# reset the environment to get the first observation\ndone = False\nobservation, info = env.reset()\n\n# observation = (16, 9, False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that our observation is a 3-tuple consisting of 3 values:\n\n- The players current sum\n- Value of the dealers face-up card\n- Boolean whether the player holds a usable ace (An ace is usable if it\n counts as 11 without busting)\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Executing an action\n\nAfter receiving our first observation, we are only going to use the\n``env.step(action)`` function to interact with the environment. This\nfunction takes an action as input and executes it in the environment.\nBecause that action changes the state of the environment, it returns\nfour useful variables to us. These are:\n\n- ``next_state``: This is the observation that the agent will receive\n after taking the action.\n- ``reward``: This is the reward that the agent will receive after\n taking the action.\n- ``terminated``: This is a boolean variable that indicates whether or\n not the environment has terminated.\n- ``truncated``: This is a boolean variable that also indicates whether\n the episode ended by early truncation, i.e., a time limit is reached.\n- ``info``: This is a dictionary that might contain additional\n information about the environment.\n\nThe ``next_state``, ``reward``, ``terminated`` and ``truncated`` variables are\nself-explanatory, but the ``info`` variable requires some additional\nexplanation. This variable contains a dictionary that might have some\nextra information about the environment, but in the Blackjack-v1\nenvironment you can ignore it. For example in Atari environments the\ninfo dictionary has a ``ale.lives`` key that tells us how many lives the\nagent has left. If the agent has 0 lives, then the episode is over.\n\nNote that it is not a good idea to call ``env.render()`` in your training\nloop because rendering slows down training by a lot. Rather try to build\nan extra loop to evaluate and showcase the agent after training.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# sample a random action from all valid actions\naction = env.action_space.sample()\n# action=1\n\n# execute the action in our environment and receive infos from the environment\nobservation, reward, terminated, truncated, info = env.step(action)\n\n# observation=(24, 10, False)\n# reward=-1.0\n# terminated=True\n# truncated=False\n# info={}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Once ``terminated = True`` or ``truncated=True``, we should stop the\ncurrent episode and begin a new one with ``env.reset()``. If you\ncontinue executing actions without resetting the environment, it still\nresponds but the output won\u2019t be useful for training (it might even be\nharmful if the agent learns on invalid data).\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Building an agent\n\nLet\u2019s build a ``Q-learning agent`` to solve *Blackjack-v1*! We\u2019ll need\nsome functions for picking an action and updating the agents action\nvalues. To ensure that the agents explores the environment, one possible\nsolution is the ``epsilon-greedy`` strategy, where we pick a random\naction with the percentage ``epsilon`` and the greedy action (currently\nvalued as the best) ``1 - epsilon``.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class BlackjackAgent:\n def __init__(\n self,\n env,\n learning_rate: float,\n initial_epsilon: float,\n epsilon_decay: float,\n final_epsilon: float,\n discount_factor: float = 0.95,\n ):\n \"\"\"Initialize a Reinforcement Learning agent with an empty dictionary\n of state-action values (q_values), a learning rate and an epsilon.\n\n Args:\n learning_rate: The learning rate\n initial_epsilon: The initial epsilon value\n epsilon_decay: The decay for epsilon\n final_epsilon: The final epsilon value\n discount_factor: The discount factor for computing the Q-value\n \"\"\"\n self.q_values = defaultdict(lambda: np.zeros(env.action_space.n))\n\n self.lr = learning_rate\n self.discount_factor = discount_factor\n\n self.epsilon = initial_epsilon\n self.epsilon_decay = epsilon_decay\n self.final_epsilon = final_epsilon\n\n self.training_error = []\n\n def get_action(self, env, obs: tuple[int, int, bool]) -> int:\n \"\"\"\n Returns the best action with probability (1 - epsilon)\n otherwise a random action with probability epsilon to ensure exploration.\n \"\"\"\n # with probability epsilon return a random action to explore the environment\n if np.random.random() < self.epsilon:\n return env.action_space.sample()\n\n # with probability (1 - epsilon) act greedily (exploit)\n else:\n return int(np.argmax(self.q_values[obs]))\n\n def update(\n self,\n obs: tuple[int, int, bool],\n action: int,\n reward: float,\n terminated: bool,\n next_obs: tuple[int, int, bool],\n ):\n \"\"\"Updates the Q-value of an action.\"\"\"\n future_q_value = (not terminated) * np.max(self.q_values[next_obs])\n temporal_difference = (\n reward + self.discount_factor * future_q_value - self.q_values[obs][action]\n )\n\n self.q_values[obs][action] = (\n self.q_values[obs][action] + self.lr * temporal_difference\n )\n self.training_error.append(temporal_difference)\n\n def decay_epsilon(self):\n self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To train the agent, we will let the agent play one episode (one complete\ngame is called an episode) at a time and then update it\u2019s Q-values after\neach step (one single action in a game is called a step).\n\nThe agent will have to experience a lot of episodes to explore the\nenvironment sufficiently.\n\nNow we should be ready to build the training loop.\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# hyperparameters\nlearning_rate = 0.01\nn_episodes = 100_000\nstart_epsilon = 1.0\nepsilon_decay = start_epsilon / (n_episodes / 2) # reduce the exploration over time\nfinal_epsilon = 0.1\n\nagent = BlackjackAgent(\n env=env,\n learning_rate=learning_rate,\n initial_epsilon=start_epsilon,\n epsilon_decay=epsilon_decay,\n final_epsilon=final_epsilon,\n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Great, let\u2019s train!\n\nInfo: The current hyperparameters are set to quickly train a decent agent.\nIf you want to converge to the optimal policy, try increasing\nthe n_episodes by 10x and lower the learning_rate (e.g. to 0.001).\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"env = gym.wrappers.RecordEpisodeStatistics(env, buffer_length=n_episodes)\nfor episode in tqdm(range(n_episodes)):\n obs, info = env.reset()\n done = False\n\n # play one episode\n while not done:\n action = agent.get_action(env, obs)\n next_obs, reward, terminated, truncated, info = env.step(action)\n\n # update the agent\n agent.update(obs, action, reward, terminated, next_obs)\n\n # update if the environment is done and the current obs\n done = terminated or truncated\n obs = next_obs\n\n agent.decay_epsilon()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visualizing the training\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"rolling_length = 500\nfig, axs = plt.subplots(ncols=3, figsize=(12, 5))\naxs[0].set_title(\"Episode rewards\")\n# compute and assign a rolling average of the data to provide a smoother graph\nreward_moving_average = (\n np.convolve(\n np.array(env.return_queue).flatten(), np.ones(rolling_length), mode=\"valid\"\n )\n / rolling_length\n)\naxs[0].plot(range(len(reward_moving_average)), reward_moving_average)\naxs[1].set_title(\"Episode lengths\")\nlength_moving_average = (\n np.convolve(\n np.array(env.length_queue).flatten(), np.ones(rolling_length), mode=\"same\"\n )\n / rolling_length\n)\naxs[1].plot(range(len(length_moving_average)), length_moving_average)\naxs[2].set_title(\"Training Error\")\ntraining_error_moving_average = (\n np.convolve(np.array(agent.training_error), np.ones(rolling_length), mode=\"same\")\n / rolling_length\n)\naxs[2].plot(range(len(training_error_moving_average)), training_error_moving_average)\nplt.tight_layout()\nplt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"file://_static/img/tutorials/blackjack_training_plots.png\">\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visualising the policy\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def create_grids(agent, usable_ace=False):\n \"\"\"Create value and policy grid given an agent.\"\"\"\n # convert our state-action values to state values\n # and build a policy dictionary that maps observations to actions\n state_value = defaultdict(float)\n policy = defaultdict(int)\n for obs, action_values in agent.q_values.items():\n state_value[obs] = float(np.max(action_values))\n policy[obs] = int(np.argmax(action_values))\n\n player_count, dealer_count = np.meshgrid(\n # players count, dealers face-up card\n np.arange(12, 22),\n np.arange(1, 11),\n )\n\n # create the value grid for plotting\n value = np.apply_along_axis(\n lambda obs: state_value[(obs[0], obs[1], usable_ace)],\n axis=2,\n arr=np.dstack([player_count, dealer_count]),\n )\n value_grid = player_count, dealer_count, value\n\n # create the policy grid for plotting\n policy_grid = np.apply_along_axis(\n lambda obs: policy[(obs[0], obs[1], usable_ace)],\n axis=2,\n arr=np.dstack([player_count, dealer_count]),\n )\n return value_grid, policy_grid\n\n\ndef create_plots(value_grid, policy_grid, title: str):\n \"\"\"Creates a plot using a value and policy grid.\"\"\"\n # create a new figure with 2 subplots (left: state values, right: policy)\n player_count, dealer_count, value = value_grid\n fig = plt.figure(figsize=plt.figaspect(0.4))\n fig.suptitle(title, fontsize=16)\n\n # plot the state values\n ax1 = fig.add_subplot(1, 2, 1, projection=\"3d\")\n ax1.plot_surface(\n player_count,\n dealer_count,\n value,\n rstride=1,\n cstride=1,\n cmap=\"viridis\",\n edgecolor=\"none\",\n )\n plt.xticks(range(12, 22), range(12, 22))\n plt.yticks(range(1, 11), [\"A\"] + list(range(2, 11)))\n ax1.set_title(f\"State values: {title}\")\n ax1.set_xlabel(\"Player sum\")\n ax1.set_ylabel(\"Dealer showing\")\n ax1.zaxis.set_rotate_label(False)\n ax1.set_zlabel(\"Value\", fontsize=14, rotation=90)\n ax1.view_init(20, 220)\n\n # plot the policy\n fig.add_subplot(1, 2, 2)\n ax2 = sns.heatmap(policy_grid, linewidth=0, annot=True, cmap=\"Accent_r\", cbar=False)\n ax2.set_title(f\"Policy: {title}\")\n ax2.set_xlabel(\"Player sum\")\n ax2.set_ylabel(\"Dealer showing\")\n ax2.set_xticklabels(range(12, 22))\n ax2.set_yticklabels([\"A\"] + list(range(2, 11)), fontsize=12)\n\n # add a legend\n legend_elements = [\n Patch(facecolor=\"lightgreen\", edgecolor=\"black\", label=\"Hit\"),\n Patch(facecolor=\"grey\", edgecolor=\"black\", label=\"Stick\"),\n ]\n ax2.legend(handles=legend_elements, bbox_to_anchor=(1.3, 1))\n return fig\n\n\n# state values & policy with usable ace (ace counts as 11)\nvalue_grid, policy_grid = create_grids(agent, usable_ace=True)\nfig1 = create_plots(value_grid, policy_grid, title=\"With usable ace\")\nplt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"file://_static/img/tutorials/blackjack_with_usable_ace.png\">\n\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# state values & policy without usable ace (ace counts as 1)\nvalue_grid, policy_grid = create_grids(agent, usable_ace=False)\nfig2 = create_plots(value_grid, policy_grid, title=\"Without usable ace\")\nplt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"file://_static/img/tutorials/blackjack_without_usable_ace.png\">\n\nIt's good practice to call env.close() at the end of your script,\nso that any used resources by the environment will be closed.\n\n\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Think you can do better?\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# You can visualize the environment using the play function\n# and try to win a few games."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Hopefully this Tutorial helped you get a grip of how to interact with\nOpenAI-Gym environments and sets you on a journey to solve many more RL\nchallenges.\n\nIt is recommended that you solve this environment by yourself (project\nbased learning is really effective!). You can apply your favorite\ndiscrete RL algorithm or give Monte Carlo ES a try (covered in [Sutton &\nBarto](http://incompleteideas.net/book/the-book-2nd.html), section\n5.3) - this way you can compare your results directly to the book.\n\nBest of fun!\n\n\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,138 @@
"""
Implementing Custom Wrappers
============================
In this tutorial we will describe how to implement your own custom wrappers.
Wrappers are a great way to add functionality to your environments in a modular way.
This will save you a lot of boilerplate code.
We will show how to create a wrapper by
- Inheriting from :class:`gymnasium.ObservationWrapper`
- Inheriting from :class:`gymnasium.ActionWrapper`
- Inheriting from :class:`gymnasium.RewardWrapper`
- Inheriting from :class:`gymnasium.Wrapper`
Before following this tutorial, make sure to check out the docs of the :mod:`gymnasium.wrappers` module.
"""
# %%
# Inheriting from :class:`gymnasium.ObservationWrapper`
# -----------------------------------------------------
# Observation wrappers are useful if you want to apply some function to the observations that are returned
# by an environment. If you implement an observation wrapper, you only need to define this transformation
# by implementing the :meth:`gymnasium.ObservationWrapper.observation` method. Moreover, you should remember to
# update the observation space, if the transformation changes the shape of observations (e.g. by transforming
# dictionaries into numpy arrays, as in the following example).
#
# Imagine you have a 2D navigation task where the environment returns dictionaries as observations with
# keys ``"agent_position"`` and ``"target_position"``. A common thing to do might be to throw away some degrees of
# freedom and only consider the position of the target relative to the agent, i.e.
# ``observation["target_position"] - observation["agent_position"]``. For this, you could implement an
# observation wrapper like this:
import numpy as np
import gymnasium as gym
from gymnasium import ActionWrapper, ObservationWrapper, RewardWrapper, Wrapper
from gymnasium.spaces import Box, Discrete
class RelativePosition(ObservationWrapper):
def __init__(self, env):
super().__init__(env)
self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)
def observation(self, obs):
return obs["target"] - obs["agent"]
# %%
# Inheriting from :class:`gymnasium.ActionWrapper`
# ------------------------------------------------
# Action wrappers can be used to apply a transformation to actions before applying them to the environment.
# If you implement an action wrapper, you need to define that transformation by implementing
# :meth:`gymnasium.ActionWrapper.action`. Moreover, you should specify the domain of that transformation
# by updating the action space of the wrapper.
#
# Lets say you have an environment with action space of type :class:`gymnasium.spaces.Box`, but you would only like
# to use a finite subset of actions. Then, you might want to implement the following wrapper:
class DiscreteActions(ActionWrapper):
def __init__(self, env, disc_to_cont):
super().__init__(env)
self.disc_to_cont = disc_to_cont
self.action_space = Discrete(len(disc_to_cont))
def action(self, act):
return self.disc_to_cont[act]
env = gym.make("LunarLanderContinuous-v3")
# print(env.action_space) # Box(-1.0, 1.0, (2,), float32)
wrapped_env = DiscreteActions(
env, [np.array([1, 0]), np.array([-1, 0]), np.array([0, 1]), np.array([0, -1])]
)
# print(wrapped_env.action_space) # Discrete(4)
# %%
# Inheriting from :class:`gymnasium.RewardWrapper`
# ------------------------------------------------
# Reward wrappers are used to transform the reward that is returned by an environment.
# As for the previous wrappers, you need to specify that transformation by implementing the
# :meth:`gymnasium.RewardWrapper.reward` method.
#
# Let us look at an example: Sometimes (especially when we do not have control over the reward
# because it is intrinsic), we want to clip the reward to a range to gain some numerical stability.
# To do that, we could, for instance, implement the following wrapper:
from typing import SupportsFloat
class ClipReward(RewardWrapper):
def __init__(self, env, min_reward, max_reward):
super().__init__(env)
self.min_reward = min_reward
self.max_reward = max_reward
def reward(self, r: SupportsFloat) -> SupportsFloat:
return np.clip(r, self.min_reward, self.max_reward)
# %%
# Inheriting from :class:`gymnasium.Wrapper`
# ------------------------------------------
# Sometimes you might need to implement a wrapper that does some more complicated modifications (e.g. modify the
# reward based on data in ``info`` or change the rendering behavior).
# Such wrappers can be implemented by inheriting from :class:`gymnasium.Wrapper`.
#
# - You can set a new action or observation space by defining ``self.action_space`` or ``self.observation_space`` in ``__init__``, respectively
# - You can set new metadata by defining ``self.metadata`` in ``__init__``
# - You can override :meth:`gymnasium.Wrapper.step`, :meth:`gymnasium.Wrapper.render`, :meth:`gymnasium.Wrapper.close` etc.
#
# If you do this, you can access the environment that was passed
# to your wrapper (which *still* might be wrapped in some other wrapper) by accessing the attribute :attr:`env`.
#
# Let's also take a look at an example for this case. Most MuJoCo environments return a reward that consists
# of different terms: For instance, there might be a term that rewards the agent for completing the task and one term that
# penalizes large actions (i.e. energy usage). Usually, you can pass weight parameters for those terms during
# initialization of the environment. However, *Reacher* does not allow you to do this! Nevertheless, all individual terms
# of the reward are returned in `info`, so let us build a wrapper for Reacher that allows us to weight those terms:
class ReacherRewardWrapper(Wrapper):
def __init__(self, env, reward_dist_weight, reward_ctrl_weight):
super().__init__(env)
self.reward_dist_weight = reward_dist_weight
self.reward_ctrl_weight = reward_ctrl_weight
def step(self, action):
obs, _, terminated, truncated, info = self.env.step(action)
reward = (
self.reward_dist_weight * info["reward_dist"]
+ self.reward_ctrl_weight * info["reward_ctrl"]
)
return obs, reward, terminated, truncated, info

File diff suppressed because one or more lines are too long

BIN
v1.2.0/_images/AE_loop.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 337 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 360 KiB

BIN
v1.2.0/_images/acrobot.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 465 KiB

BIN
v1.2.0/_images/ant.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.3 MiB

BIN
v1.2.0/_images/ant.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 266 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 295 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 213 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 183 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 137 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
v1.2.0/_images/hopper.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 MiB

BIN
v1.2.0/_images/hopper.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
v1.2.0/_images/humanoid.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

BIN
v1.2.0/_images/humanoid.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 466 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 814 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 888 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 869 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

BIN
v1.2.0/_images/pendulum.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 584 KiB

BIN
v1.2.0/_images/pendulum.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.8 KiB

BIN
v1.2.0/_images/pusher.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 MiB

BIN
v1.2.0/_images/pusher.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
v1.2.0/_images/reacher.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 MiB

BIN
v1.2.0/_images/reacher.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

BIN
v1.2.0/_images/swimmer.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 MiB

BIN
v1.2.0/_images/swimmer.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
v1.2.0/_images/taxi.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
v1.2.0/_images/walker2d.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.5 MiB

BIN
v1.2.0/_images/walker2d.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,924 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../../../../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/_modules/gymnasium/envs/functional_jax_env.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><link rel="index" title="Index" href="../../../../genindex/" /><link rel="search" title="Search" href="../../../../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/_modules/gymnasium/envs/functional_jax_env.html" />
<link rel="shortcut icon" href="../../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>gymnasium.envs.functional_jax_env - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../../../../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../../../../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../../../../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<h1>Source code for gymnasium.envs.functional_jax_env</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Functional to Environment compatibility.&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Generic</span><span class="p">,</span> <span class="n">TypeAlias</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">jax</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">jax.numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">jnp</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">jax.random</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">jrng</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">gym</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.envs.registration</span><span class="w"> </span><span class="kn">import</span> <span class="n">EnvSpec</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.experimental.functional</span><span class="w"> </span><span class="kn">import</span> <span class="n">ActType</span><span class="p">,</span> <span class="n">FuncEnv</span><span class="p">,</span> <span class="n">ObsType</span><span class="p">,</span> <span class="n">StateType</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">seeding</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.vector</span><span class="w"> </span><span class="kn">import</span> <span class="n">AutoresetMode</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.vector.utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">batch_space</span>
<span class="n">PRNGKeyType</span><span class="p">:</span> <span class="n">TypeAlias</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">Array</span>
<div class="viewcode-block" id="FunctionalJaxEnv">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.envs.functional_jax_env.FunctionalJaxEnv">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">FunctionalJaxEnv</span><span class="p">(</span><span class="n">gym</span><span class="o">.</span><span class="n">Env</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">StateType</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A conversion layer for jax-based environments.&quot;&quot;&quot;</span>
<span class="n">state</span><span class="p">:</span> <span class="n">StateType</span>
<span class="n">rng</span><span class="p">:</span> <span class="n">PRNGKeyType</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">func_env</span><span class="p">:</span> <span class="n">FuncEnv</span><span class="p">,</span>
<span class="n">metadata</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">render_mode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">spec</span><span class="p">:</span> <span class="n">EnvSpec</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize the environment from a FuncEnv.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">metadata</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># metadata.get(&quot;jax&quot;, False) can be used downstream to know that the environment returns jax arrays</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;render_mode&quot;</span><span class="p">:</span> <span class="p">[],</span> <span class="s2">&quot;jax&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">func_env</span> <span class="o">=</span> <span class="n">func_env</span>
<span class="bp">self</span><span class="o">.</span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">func_env</span><span class="o">.</span><span class="n">observation_space</span>
<span class="bp">self</span><span class="o">.</span><span class="n">action_space</span> <span class="o">=</span> <span class="n">func_env</span><span class="o">.</span><span class="n">action_space</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span> <span class="o">=</span> <span class="n">metadata</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">=</span> <span class="n">render_mode</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spec</span> <span class="o">=</span> <span class="n">spec</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">&quot;rgb_array&quot;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">render_init</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">np_random</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">()</span>
<span class="n">seed</span> <span class="o">=</span> <span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="o">**</span><span class="mi">32</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;uint32&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
<div class="viewcode-block" id="FunctionalJaxEnv.reset">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.envs.functional_jax_env.FunctionalJaxEnv.reset">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">options</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Resets the environment using the seed.&quot;&quot;&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
<span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rng</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">initial</span><span class="p">(</span><span class="n">rng</span><span class="o">=</span><span class="n">rng</span><span class="p">)</span>
<span class="n">obs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">observation</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">state_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">)</span>
<span class="k">return</span> <span class="n">obs</span><span class="p">,</span> <span class="n">info</span></div>
<div class="viewcode-block" id="FunctionalJaxEnv.step">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.envs.functional_jax_env.FunctionalJaxEnv.step">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">:</span> <span class="n">ActType</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Steps through the environment using the action.&quot;&quot;&quot;</span>
<span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rng</span><span class="p">)</span>
<span class="n">next_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">transition</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">observation</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">observation</span><span class="p">(</span><span class="n">next_state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">reward</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">reward</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">next_state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">terminated</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">terminal</span><span class="p">(</span><span class="n">next_state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">transition_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">next_state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">next_state</span>
<span class="k">return</span> <span class="n">observation</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">reward</span><span class="p">),</span> <span class="nb">bool</span><span class="p">(</span><span class="n">terminated</span><span class="p">),</span> <span class="kc">False</span><span class="p">,</span> <span class="n">info</span></div>
<div class="viewcode-block" id="FunctionalJaxEnv.render">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.envs.functional_jax_env.FunctionalJaxEnv.render">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns the render state if `render_mode` is &quot;rgb_array&quot;.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">&quot;rgb_array&quot;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span><span class="p">,</span> <span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">render_image</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_state</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">image</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Closes the environments and render state if set.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">render_close</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">render_state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="o">=</span> <span class="kc">None</span></div>
<span class="k">class</span><span class="w"> </span><span class="nc">FunctionalJaxVectorEnv</span><span class="p">(</span>
<span class="n">gym</span><span class="o">.</span><span class="n">vector</span><span class="o">.</span><span class="n">VectorEnv</span><span class="p">[</span><span class="n">ObsType</span><span class="p">,</span> <span class="n">ActType</span><span class="p">,</span> <span class="n">Any</span><span class="p">],</span> <span class="n">Generic</span><span class="p">[</span><span class="n">ObsType</span><span class="p">,</span> <span class="n">ActType</span><span class="p">,</span> <span class="n">StateType</span><span class="p">]</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A vector env implementation for functional Jax envs.&quot;&quot;&quot;</span>
<span class="n">state</span><span class="p">:</span> <span class="n">StateType</span>
<span class="n">rng</span><span class="p">:</span> <span class="n">PRNGKeyType</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">func_env</span><span class="p">:</span> <span class="n">FuncEnv</span><span class="p">[</span><span class="n">StateType</span><span class="p">,</span> <span class="n">ObsType</span><span class="p">,</span> <span class="n">ActType</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Any</span><span class="p">],</span>
<span class="n">num_envs</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">max_episode_steps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">metadata</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">render_mode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">spec</span><span class="p">:</span> <span class="n">EnvSpec</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize the environment from a FuncEnv.&quot;&quot;&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="k">if</span> <span class="n">metadata</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;autoreset_mode&quot;</span><span class="p">:</span> <span class="n">AutoresetMode</span><span class="o">.</span><span class="n">NEXT_STEP</span><span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">func_env</span> <span class="o">=</span> <span class="n">func_env</span>
<span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span> <span class="o">=</span> <span class="n">num_envs</span>
<span class="bp">self</span><span class="o">.</span><span class="n">single_observation_space</span> <span class="o">=</span> <span class="n">func_env</span><span class="o">.</span><span class="n">observation_space</span>
<span class="bp">self</span><span class="o">.</span><span class="n">single_action_space</span> <span class="o">=</span> <span class="n">func_env</span><span class="o">.</span><span class="n">action_space</span>
<span class="bp">self</span><span class="o">.</span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">batch_space</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">single_observation_space</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">action_space</span> <span class="o">=</span> <span class="n">batch_space</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">single_action_space</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span> <span class="o">=</span> <span class="n">metadata</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">=</span> <span class="n">render_mode</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spec</span> <span class="o">=</span> <span class="n">spec</span>
<span class="bp">self</span><span class="o">.</span><span class="n">time_limit</span> <span class="o">=</span> <span class="n">max_episode_steps</span>
<span class="bp">self</span><span class="o">.</span><span class="n">steps</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">jnp</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">prev_done</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">jnp</span><span class="o">.</span><span class="n">bool_</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">&quot;rgb_array&quot;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">render_init</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">np_random</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">seeding</span><span class="o">.</span><span class="n">np_random</span><span class="p">()</span>
<span class="n">seed</span> <span class="o">=</span> <span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="o">**</span><span class="mi">32</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;uint32&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">jax</span><span class="o">.</span><span class="n">vmap</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">options</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Resets the environment.&quot;&quot;&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
<span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
<span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rng</span><span class="p">)</span>
<span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">initial</span><span class="p">(</span><span class="n">rng</span><span class="o">=</span><span class="n">rng</span><span class="p">)</span>
<span class="n">obs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">observation</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">state_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">steps</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">jnp</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span>
<span class="k">return</span> <span class="n">obs</span><span class="p">,</span> <span class="n">info</span>
<span class="k">def</span><span class="w"> </span><span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">:</span> <span class="n">ActType</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Steps through the environment using the action.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">steps</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rng</span><span class="p">)</span>
<span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span><span class="p">)</span>
<span class="n">next_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">transition</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">reward</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">reward</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">next_state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">terminated</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">terminal</span><span class="p">(</span><span class="n">next_state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="n">truncated</span> <span class="o">=</span> <span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">steps</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">time_limit</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">time_limit</span> <span class="o">&gt;</span> <span class="mi">0</span>
<span class="k">else</span> <span class="n">jnp</span><span class="o">.</span><span class="n">zeros_like</span><span class="p">(</span><span class="n">terminated</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">transition_info</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">next_state</span><span class="p">)</span>
<span class="k">if</span> <span class="n">jnp</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prev_done</span><span class="p">):</span>
<span class="n">to_reset</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prev_done</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">reset_count</span> <span class="o">=</span> <span class="n">to_reset</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rng</span><span class="p">)</span>
<span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">rng</span><span class="p">,</span> <span class="n">reset_count</span><span class="p">)</span>
<span class="n">new_initials</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">initial</span><span class="p">(</span><span class="n">rng</span><span class="p">)</span>
<span class="n">next_state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">at</span><span class="p">[</span><span class="n">to_reset</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">new_initials</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">steps</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">steps</span><span class="o">.</span><span class="n">at</span><span class="p">[</span><span class="n">to_reset</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">terminated</span> <span class="o">=</span> <span class="n">terminated</span><span class="o">.</span><span class="n">at</span><span class="p">[</span><span class="n">to_reset</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">truncated</span> <span class="o">=</span> <span class="n">truncated</span><span class="o">.</span><span class="n">at</span><span class="p">[</span><span class="n">to_reset</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">prev_done</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">logical_or</span><span class="p">(</span><span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">)</span>
<span class="n">rng</span> <span class="o">=</span> <span class="n">jrng</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">rng</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_envs</span><span class="p">)</span>
<span class="n">observation</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">observation</span><span class="p">(</span><span class="n">next_state</span><span class="p">,</span> <span class="n">rng</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="n">next_state</span>
<span class="k">return</span> <span class="n">observation</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span>
<span class="k">def</span><span class="w"> </span><span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns the render state if `render_mode` is &quot;rgb_array&quot;.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">&quot;rgb_array&quot;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span><span class="p">,</span> <span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">render_image</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_state</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">image</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span>
<span class="k">def</span><span class="w"> </span><span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Closes the environments and render state if set.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">func_env</span><span class="o">.</span><span class="n">render_close</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">render_state</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">render_state</span> <span class="o">=</span> <span class="kc">None</span>
</pre></div>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../../../../_static/documentation_options.js?v=151cd43d"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,848 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../../../../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/_modules/gymnasium/experimental/functional.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><link rel="index" title="Index" href="../../../../genindex/" /><link rel="search" title="Search" href="../../../../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/_modules/gymnasium/experimental/functional.html" />
<link rel="shortcut icon" href="../../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>gymnasium.experimental.functional - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../../../../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../../../../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../../../../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<h1>Source code for gymnasium.experimental.functional</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Base class and definitions for an alternative, functional backend for gym envs, particularly suitable for hardware accelerated and otherwise transformed environments.&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Callable</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Generic</span><span class="p">,</span> <span class="n">TypeVar</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium</span><span class="w"> </span><span class="kn">import</span> <span class="n">Space</span>
<span class="n">StateType</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;StateType&quot;</span><span class="p">)</span>
<span class="n">ActType</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;ActType&quot;</span><span class="p">)</span>
<span class="n">ObsType</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;ObsType&quot;</span><span class="p">)</span>
<span class="n">RewardType</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;RewardType&quot;</span><span class="p">)</span>
<span class="n">TerminalType</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;TerminalType&quot;</span><span class="p">)</span>
<span class="n">RenderStateType</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;RenderStateType&quot;</span><span class="p">)</span>
<span class="n">Params</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;Params&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="FuncEnv">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">FuncEnv</span><span class="p">(</span>
<span class="n">Generic</span><span class="p">[</span>
<span class="n">StateType</span><span class="p">,</span> <span class="n">ObsType</span><span class="p">,</span> <span class="n">ActType</span><span class="p">,</span> <span class="n">RewardType</span><span class="p">,</span> <span class="n">TerminalType</span><span class="p">,</span> <span class="n">RenderStateType</span><span class="p">,</span> <span class="n">Params</span>
<span class="p">]</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Base class (template) for functional envs.</span>
<span class="sd"> This API is meant to be used in a stateless manner, with the environment state being passed around explicitly.</span>
<span class="sd"> That being said, nothing here prevents users from using the environment statefully, it&#39;s just not recommended.</span>
<span class="sd"> A functional env consists of the following functions (in this case, instance methods):</span>
<span class="sd"> * initial: returns the initial state of the POMDP</span>
<span class="sd"> * observation: returns the observation in a given state</span>
<span class="sd"> * transition: returns the next state after taking an action in a given state</span>
<span class="sd"> * reward: returns the reward for a given (state, action, next_state) tuple</span>
<span class="sd"> * terminal: returns whether a given state is terminal</span>
<span class="sd"> * state_info: optional, returns a dict of info about a given state</span>
<span class="sd"> * step_info: optional, returns a dict of info about a given (state, action, next_state) tuple</span>
<span class="sd"> The class-based structure serves the purpose of allowing environment constants to be defined in the class,</span>
<span class="sd"> and then using them by name in the code itself.</span>
<span class="sd"> For the moment, this is predominantly for internal use. This API is likely to change, but in the future</span>
<span class="sd"> we intend to flesh it out and officially expose it to end users.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">observation_space</span><span class="p">:</span> <span class="n">Space</span>
<span class="n">action_space</span><span class="p">:</span> <span class="n">Space</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">options</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize the environment constants.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">options</span> <span class="ow">or</span> <span class="p">{})</span>
<span class="bp">self</span><span class="o">.</span><span class="n">default_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_default_params</span><span class="p">()</span>
<div class="viewcode-block" id="FuncEnv.initial">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.initial">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">initial</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rng</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">StateType</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Generates the initial state of the environment with a random number generator.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="FuncEnv.transition">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.transition">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">transition</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span> <span class="n">action</span><span class="p">:</span> <span class="n">ActType</span><span class="p">,</span> <span class="n">rng</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">StateType</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Updates (transitions) the state with an action and random number generator.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="FuncEnv.observation">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.observation">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">observation</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span> <span class="n">rng</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ObsType</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Generates an observation for a given state of an environment.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="FuncEnv.reward">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.reward">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">reward</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span>
<span class="n">action</span><span class="p">:</span> <span class="n">ActType</span><span class="p">,</span>
<span class="n">next_state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span>
<span class="n">rng</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span>
<span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RewardType</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Computes the reward for a given transition between `state`, `action` to `next_state`.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="FuncEnv.terminal">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.terminal">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">terminal</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span> <span class="n">rng</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">TerminalType</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns if the state is a final terminal state.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="FuncEnv.state_info">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.state_info">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">state_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Info dict about a single state.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">{}</span></div>
<div class="viewcode-block" id="FuncEnv.transition_info">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.transition_info">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">transition_info</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span>
<span class="n">action</span><span class="p">:</span> <span class="n">ActType</span><span class="p">,</span>
<span class="n">next_state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span>
<span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Info dict about a full transition.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">{}</span></div>
<div class="viewcode-block" id="FuncEnv.transform">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.transform">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Callable</span><span class="p">],</span> <span class="n">Callable</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Functional transformations.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">initial</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">initial</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">transition</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">transition</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">observation</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">observation</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">reward</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">reward</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">terminal</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">terminal</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">state_info</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state_info</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">step_info</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">transition_info</span><span class="p">)</span></div>
<div class="viewcode-block" id="FuncEnv.render_image">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.render_image">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">render_image</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">state</span><span class="p">:</span> <span class="n">StateType</span><span class="p">,</span>
<span class="n">render_state</span><span class="p">:</span> <span class="n">RenderStateType</span><span class="p">,</span>
<span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">RenderStateType</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Show the state.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="FuncEnv.render_init">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.render_init">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">render_init</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RenderStateType</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialize the render state.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<div class="viewcode-block" id="FuncEnv.render_close">
<a class="viewcode-back" href="../../../../api/functional/#gymnasium.experimental.functional.FuncEnv.render_close">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">render_close</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">render_state</span><span class="p">:</span> <span class="n">RenderStateType</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Close the render state.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">get_default_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Params</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Get the default params.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">None</span></div>
</pre></div>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../../../../_static/documentation_options.js?v=151cd43d"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,949 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../../../../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/_modules/gymnasium/spaces/dict.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><link rel="index" title="Index" href="../../../../genindex/" /><link rel="search" title="Search" href="../../../../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/_modules/gymnasium/spaces/dict.html" />
<link rel="shortcut icon" href="../../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>gymnasium.spaces.dict - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../../../../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../../../../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../../../../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<h1>Source code for gymnasium.spaces.dict</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Implementation of a space that represents the cartesian product of other spaces as a dictionary.&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">collections.abc</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">typing</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">collections</span><span class="w"> </span><span class="kn">import</span> <span class="n">OrderedDict</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">KeysView</span><span class="p">,</span> <span class="n">Sequence</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.spaces.space</span><span class="w"> </span><span class="kn">import</span> <span class="n">Space</span>
<div class="viewcode-block" id="Dict">
<a class="viewcode-back" href="../../../../api/spaces/composite/#gymnasium.spaces.Dict">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">Dict</span><span class="p">(</span><span class="n">Space</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]],</span> <span class="n">typing</span><span class="o">.</span><span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Space</span><span class="p">[</span><span class="n">Any</span><span class="p">]]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A dictionary of :class:`Space` instances.</span>
<span class="sd"> Elements of this space are (ordered) dictionaries of elements from the constituent spaces.</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; from gymnasium.spaces import Dict, Box, Discrete</span>
<span class="sd"> &gt;&gt;&gt; observation_space = Dict({&quot;position&quot;: Box(-1, 1, shape=(2,)), &quot;color&quot;: Discrete(3)}, seed=42)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample()</span>
<span class="sd"> {&#39;color&#39;: np.int64(0), &#39;position&#39;: array([-0.3991573 , 0.21649833], dtype=float32)}</span>
<span class="sd"> With a nested dict:</span>
<span class="sd"> &gt;&gt;&gt; from gymnasium.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete</span>
<span class="sd"> &gt;&gt;&gt; Dict( # doctest: +SKIP</span>
<span class="sd"> ... {</span>
<span class="sd"> ... &quot;ext_controller&quot;: MultiDiscrete([5, 2, 2]),</span>
<span class="sd"> ... &quot;inner_state&quot;: Dict(</span>
<span class="sd"> ... {</span>
<span class="sd"> ... &quot;charge&quot;: Discrete(100),</span>
<span class="sd"> ... &quot;system_checks&quot;: MultiBinary(10),</span>
<span class="sd"> ... &quot;job_status&quot;: Dict(</span>
<span class="sd"> ... {</span>
<span class="sd"> ... &quot;task&quot;: Discrete(5),</span>
<span class="sd"> ... &quot;progress&quot;: Box(low=0, high=100, shape=()),</span>
<span class="sd"> ... }</span>
<span class="sd"> ... ),</span>
<span class="sd"> ... }</span>
<span class="sd"> ... ),</span>
<span class="sd"> ... }</span>
<span class="sd"> ... )</span>
<span class="sd"> It can be convenient to use :class:`Dict` spaces if you want to make complex observations or actions more human-readable.</span>
<span class="sd"> Usually, it will not be possible to use elements of this space directly in learning code. However, you can easily</span>
<span class="sd"> convert :class:`Dict` observations to flat arrays by using a :class:`gymnasium.wrappers.FlattenObservation` wrapper.</span>
<span class="sd"> Similar wrappers can be implemented to deal with :class:`Dict` actions.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">spaces</span><span class="p">:</span> <span class="kc">None</span> <span class="o">|</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Space</span><span class="p">]</span> <span class="o">|</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Space</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">seed</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">|</span> <span class="nb">int</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">Generator</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="o">**</span><span class="n">spaces_kwargs</span><span class="p">:</span> <span class="n">Space</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Constructor of :class:`Dict` space.</span>
<span class="sd"> This space can be instantiated in one of two ways: Either you pass a dictionary</span>
<span class="sd"> of spaces to :meth:`__init__` via the ``spaces`` argument, or you pass the spaces as separate</span>
<span class="sd"> keyword arguments (where you will need to avoid the keys ``spaces`` and ``seed``)</span>
<span class="sd"> Args:</span>
<span class="sd"> spaces: A dictionary of spaces. This specifies the structure of the :class:`Dict` space</span>
<span class="sd"> seed: Optionally, you can use this argument to seed the RNGs of the spaces that make up the :class:`Dict` space.</span>
<span class="sd"> **spaces_kwargs: If ``spaces`` is ``None``, you need to pass the constituent spaces as keyword arguments, as described above.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spaces</span><span class="p">,</span> <span class="n">OrderedDict</span><span class="p">):</span>
<span class="n">spaces</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">())</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spaces</span><span class="p">,</span> <span class="n">collections</span><span class="o">.</span><span class="n">abc</span><span class="o">.</span><span class="n">Mapping</span><span class="p">):</span>
<span class="c1"># for legacy reasons, we need to preserve the sorted dictionary items.</span>
<span class="c1"># as this could matter for projects flatten the dictionary.</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">spaces</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()))</span>
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
<span class="c1"># Incomparable types (e.g. `int` vs. `str`, or user-defined types) found.</span>
<span class="c1"># The keys remain in the insertion order.</span>
<span class="n">spaces</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">())</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spaces</span><span class="p">,</span> <span class="n">Sequence</span><span class="p">):</span>
<span class="n">spaces</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">spaces</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">spaces</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">spaces</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Unexpected Dict space input, expecting dict, OrderedDict or Sequence, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">spaces</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="c1"># Add kwargs to spaces to allow both dictionary and keywords to be used</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">space</span> <span class="ow">in</span> <span class="n">spaces_kwargs</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">spaces</span><span class="p">:</span>
<span class="n">spaces</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">space</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Dict space keyword &#39;</span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2">&#39; already exists in the spaces dictionary.&quot;</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Space</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="n">spaces</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">space</span><span class="p">,</span> <span class="n">Space</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Dict space element is not an instance of Space: key=&#39;</span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2">&#39;, space=</span><span class="si">{</span><span class="n">space</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="c1"># None for shape and dtype, since it&#39;ll require special handling</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">seed</span><span class="p">)</span> <span class="c1"># type: ignore</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">is_np_flattenable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Checks whether this space can be flattened to a :class:`spaces.Box`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">all</span><span class="p">(</span><span class="n">space</span><span class="o">.</span><span class="n">is_np_flattenable</span> <span class="k">for</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
<div class="viewcode-block" id="Dict.seed">
<a class="viewcode-back" href="../../../../api/spaces/composite/#gymnasium.spaces.Dict.seed">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">seed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">int</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Seed the PRNG of this space and all subspaces.</span>
<span class="sd"> Depending on the type of seed, the subspaces will be seeded differently</span>
<span class="sd"> * ``None`` - All the subspaces will use a random initial seed</span>
<span class="sd"> * ``Int`` - The integer is used to seed the :class:`Dict` space that is used to generate seed values for each of the subspaces. Warning, this does not guarantee unique seeds for all subspaces, though is very unlikely.</span>
<span class="sd"> * ``Dict`` - A dictionary of seeds for each subspace, requires a seed key for every subspace. This supports seeding of multiple composite subspaces (``Dict[&quot;space&quot;: Dict[...], ...]`` with ``{&quot;space&quot;: {...}, ...}``).</span>
<span class="sd"> Args:</span>
<span class="sd"> seed: An optional int or dictionary of subspace keys to int to seed each PRNG. See above for more details.</span>
<span class="sd"> Returns:</span>
<span class="sd"> A dictionary for the seed values of the subspaces</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">subspace</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span> <span class="k">for</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">subspace</span><span class="p">)</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">seed</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
<span class="c1"># Using `np.int32` will mean that the same key occurring is extremely low, even for large subspaces</span>
<span class="n">subseeds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">iinfo</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span><span class="o">.</span><span class="n">max</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="p">{</span>
<span class="n">key</span><span class="p">:</span> <span class="n">subspace</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">subseed</span><span class="p">))</span>
<span class="k">for</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">subspace</span><span class="p">),</span> <span class="n">subseed</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">subseeds</span><span class="p">)</span>
<span class="p">}</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">seed</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="k">if</span> <span class="n">seed</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;The seed keys: </span><span class="si">{</span><span class="n">seed</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span><span class="si">}</span><span class="s2"> are not identical to space keys: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">[</span><span class="n">key</span><span class="p">])</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">seed</span><span class="o">.</span><span class="n">keys</span><span class="p">()}</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Expected seed type: dict, int or None, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Dict.sample">
<a class="viewcode-back" href="../../../../api/spaces/composite/#gymnasium.spaces.Dict.sample">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">sample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">mask</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">probability</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Generates a single random sample from this space.</span>
<span class="sd"> The sample is an ordered dictionary of independent samples from the constituent spaces.</span>
<span class="sd"> Args:</span>
<span class="sd"> mask: An optional mask for each of the subspaces, expects the same keys as the space</span>
<span class="sd"> probability: An optional probability mask for each of the subspaces, expects the same keys as the space</span>
<span class="sd"> Returns:</span>
<span class="sd"> A dictionary with the same key and sampled values from :attr:`self.spaces`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Only one of `mask` or `probability` can be provided, actual values: mask=</span><span class="si">{</span><span class="n">mask</span><span class="si">}</span><span class="s2">, probability=</span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">mask</span><span class="p">,</span> <span class="nb">dict</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected sample mask to be a dict, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">mask</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected sample mask keys to be same as space keys, mask keys: </span><span class="si">{</span><span class="n">mask</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span><span class="si">}</span><span class="s2">, space keys: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">return</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">space</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">mask</span><span class="o">=</span><span class="n">mask</span><span class="p">[</span><span class="n">k</span><span class="p">])</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="k">elif</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">probability</span><span class="p">,</span> <span class="nb">dict</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected sample probability mask to be a dict, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">probability</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">probability</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected sample probability mask keys to be same as space keys, mask keys: </span><span class="si">{</span><span class="n">probability</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span><span class="si">}</span><span class="s2">, space keys: </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">return</span> <span class="p">{</span>
<span class="n">k</span><span class="p">:</span> <span class="n">space</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">probability</span><span class="o">=</span><span class="n">probability</span><span class="p">[</span><span class="n">k</span><span class="p">])</span>
<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
<span class="p">}</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">contains</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return boolean specifying if x is a valid member of this space.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="n">x</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="k">return</span> <span class="nb">all</span><span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Space</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Get the space that is associated to `key`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="nf">keys</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">KeysView</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns the keys of the Dict.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">KeysView</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__setitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Space</span><span class="p">[</span><span class="n">Any</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Set the space that is associated to `key`.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">value</span><span class="p">,</span> <span class="n">Space</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Trying to set </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2"> to Dict space with value that is not a gymnasium space, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">value</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Iterator through the keys of the subspaces.&quot;&quot;&quot;</span>
<span class="k">yield from</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gives the number of simpler spaces that make up the `Dict` space.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gives a string representation of this space.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">(</span>
<span class="s2">&quot;Dict(&quot;</span> <span class="o">+</span> <span class="s2">&quot;, &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">k</span><span class="si">!r}</span><span class="s2">: </span><span class="si">{</span><span class="n">s</span><span class="si">}</span><span class="s2">&quot;</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">s</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()])</span> <span class="o">+</span> <span class="s2">&quot;)&quot;</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether `other` is equivalent to this instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Dict</span><span class="p">)</span>
<span class="c1"># Comparison of `OrderedDict`s is order-sensitive</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">spaces</span> <span class="c1"># OrderedDict.__eq__</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">to_jsonable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">[</span><span class="n">Any</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a batch of samples from this space to a JSONable data type.&quot;&quot;&quot;</span>
<span class="c1"># serialize as dict-repr of vectors</span>
<span class="k">return</span> <span class="p">{</span>
<span class="n">key</span><span class="p">:</span> <span class="n">space</span><span class="o">.</span><span class="n">to_jsonable</span><span class="p">([</span><span class="n">sample</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="n">sample_n</span><span class="p">])</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
<span class="p">}</span>
<span class="k">def</span><span class="w"> </span><span class="nf">from_jsonable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">[</span><span class="n">Any</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a JSONable data type to a batch of samples from this space.&quot;&quot;&quot;</span>
<span class="n">dict_of_list</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{</span>
<span class="n">key</span><span class="p">:</span> <span class="n">space</span><span class="o">.</span><span class="n">from_jsonable</span><span class="p">(</span><span class="n">sample_n</span><span class="p">[</span><span class="n">key</span><span class="p">])</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
<span class="p">}</span>
<span class="n">n_elements</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="nb">next</span><span class="p">(</span><span class="nb">iter</span><span class="p">(</span><span class="n">dict_of_list</span><span class="o">.</span><span class="n">values</span><span class="p">())))</span>
<span class="n">result</span> <span class="o">=</span> <span class="p">[</span>
<span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">value</span><span class="p">[</span><span class="n">n</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">dict_of_list</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_elements</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">return</span> <span class="n">result</span></div>
</pre></div>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../../../../_static/documentation_options.js?v=151cd43d"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

View File

@@ -0,0 +1,872 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../../../../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/_modules/gymnasium/spaces/discrete.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><link rel="index" title="Index" href="../../../../genindex/" /><link rel="search" title="Search" href="../../../../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/_modules/gymnasium/spaces/discrete.html" />
<link rel="shortcut icon" href="../../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>gymnasium.spaces.discrete - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../../../../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../../../../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../../../../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<h1>Source code for gymnasium.spaces.discrete</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Implementation of a space consisting of finitely many elements.&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Iterable</span><span class="p">,</span> <span class="n">Mapping</span><span class="p">,</span> <span class="n">Sequence</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.spaces.space</span><span class="w"> </span><span class="kn">import</span> <span class="n">MaskNDArray</span><span class="p">,</span> <span class="n">Space</span>
<div class="viewcode-block" id="Discrete">
<a class="viewcode-back" href="../../../../api/spaces/fundamental/#gymnasium.spaces.Discrete">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">Discrete</span><span class="p">(</span><span class="n">Space</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">]):</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;A space consisting of finitely many elements.</span>
<span class="sd"> This class represents a finite subset of integers, more specifically a set of the form :math:`\{ a, a+1, \dots, a+n-1 \}`.</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; from gymnasium.spaces import Discrete</span>
<span class="sd"> &gt;&gt;&gt; observation_space = Discrete(2, seed=42) # {0, 1}</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample()</span>
<span class="sd"> np.int64(0)</span>
<span class="sd"> &gt;&gt;&gt; observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1}</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample()</span>
<span class="sd"> np.int64(-1)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample(mask=np.array([0,0,1], dtype=np.int8))</span>
<span class="sd"> np.int64(1)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample(probability=np.array([0,0,1], dtype=np.float64))</span>
<span class="sd"> np.int64(1)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample(probability=np.array([0,0.3,0.7], dtype=np.float64))</span>
<span class="sd"> np.int64(1)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span>
<span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">Generator</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">start</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;Constructor of :class:`Discrete` space.</span>
<span class="sd"> This will construct the space :math:`\{\text{start}, ..., \text{start} + n - 1\}`.</span>
<span class="sd"> Args:</span>
<span class="sd"> n (int): The number of elements of this space.</span>
<span class="sd"> seed: Optionally, you can use this argument to seed the RNG that is used to sample from the ``Dict`` space.</span>
<span class="sd"> start (int): The smallest element of this space.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span>
<span class="nb">type</span><span class="p">(</span><span class="n">n</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects `n` to be an integer, actual dtype: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">n</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">n</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;n (counts) have to be positive&quot;</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span>
<span class="nb">type</span><span class="p">(</span><span class="n">start</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects `start` to be an integer, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">start</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">(</span><span class="n">start</span><span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">((),</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">,</span> <span class="n">seed</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">is_np_flattenable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Checks whether this space can be flattened to a :class:`spaces.Box`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">True</span>
<div class="viewcode-block" id="Discrete.sample">
<a class="viewcode-back" href="../../../../api/spaces/fundamental/#gymnasium.spaces.Discrete.sample">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">sample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">mask</span><span class="p">:</span> <span class="n">MaskNDArray</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">probability</span><span class="p">:</span> <span class="n">MaskNDArray</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Generates a single random sample from this space.</span>
<span class="sd"> A sample will be chosen uniformly at random with the mask if provided, or it will be chosen according to a specified probability distribution if the probability mask is provided.</span>
<span class="sd"> Args:</span>
<span class="sd"> mask: An optional mask for if an action can be selected.</span>
<span class="sd"> Expected `np.ndarray` of shape ``(n,)`` and dtype ``np.int8`` where ``1`` represents valid actions and ``0`` invalid / infeasible actions.</span>
<span class="sd"> If there are no possible actions (i.e. ``np.all(mask == 0)``) then ``space.start`` will be returned.</span>
<span class="sd"> probability: An optional probability mask describing the probability of each action being selected.</span>
<span class="sd"> Expected `np.ndarray` of shape ``(n,)`` and dtype ``np.float64`` where each value is in the range ``[0, 1]`` and the sum of all values is 1.</span>
<span class="sd"> If the values do not sum to 1, an exception will be thrown.</span>
<span class="sd"> Returns:</span>
<span class="sd"> A sampled integer from the space</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Only one of `mask` or `probability` can be provided, actual values: mask=</span><span class="si">{</span><span class="n">mask</span><span class="si">}</span><span class="s2">, probability=</span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="c1"># binary mask sampling</span>
<span class="k">elif</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">mask</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected type of the sample mask is np.ndarray, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">mask</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">int8</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected dtype of the sample mask is np.int8, actual dtype: </span><span class="si">{</span><span class="n">mask</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">mask</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">,</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected shape of the sample mask is </span><span class="si">{</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">),)</span><span class="si">}</span><span class="s2">, actual shape: </span><span class="si">{</span><span class="n">mask</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="n">valid_action_mask</span> <span class="o">=</span> <span class="n">mask</span> <span class="o">==</span> <span class="mi">1</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">logical_or</span><span class="p">(</span><span class="n">mask</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="n">valid_action_mask</span><span class="p">)</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;All values of the sample mask should be 0 or 1, actual values: </span><span class="si">{</span><span class="n">mask</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">valid_action_mask</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">valid_action_mask</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span>
<span class="c1"># probability mask sampling</span>
<span class="k">elif</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">probability</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected type of the sample probability is np.ndarray, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">probability</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">probability</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected dtype of the sample probability is np.float64, actual dtype: </span><span class="si">{</span><span class="n">probability</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">probability</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">,</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected shape of the sample probability is </span><span class="si">{</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">),)</span><span class="si">}</span><span class="s2">, actual shape: </span><span class="si">{</span><span class="n">probability</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">probability</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">probability</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">)</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;All values of the sample probability should be between 0 and 1, actual values: </span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">probability</span><span class="p">),</span> <span class="mi">1</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The sum of the sample probability should be equal to 1, actual sum: </span><span class="si">{</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">probability</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">),</span> <span class="n">p</span><span class="o">=</span><span class="n">probability</span><span class="p">)</span>
<span class="c1"># uniform sampling</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">)</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">contains</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return boolean specifying if x is a valid member of this space.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="n">as_int64</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">generic</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">))</span> <span class="ow">and</span> <span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">)</span> <span class="ow">and</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="p">()</span>
<span class="p">):</span>
<span class="n">as_int64</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">False</span>
<span class="k">return</span> <span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">&lt;=</span> <span class="n">as_int64</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gives a string representation of this space.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;Discrete(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="si">}</span><span class="s2">, start=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">start</span><span class="si">}</span><span class="s2">)&quot;</span>
<span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;Discrete(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="si">}</span><span class="s2">)&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether ``other`` is equivalent to this instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Discrete</span><span class="p">)</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">n</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">n</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">start</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">|</span> <span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Used when loading a pickled space.</span>
<span class="sd"> This method has to be implemented explicitly to allow for loading of legacy states.</span>
<span class="sd"> Args:</span>
<span class="sd"> state: The new state</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Don&#39;t mutate the original state</span>
<span class="n">state</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">state</span><span class="p">)</span>
<span class="c1"># Allow for loading of legacy states.</span>
<span class="c1"># See https://github.com/openai/gym/pull/2470</span>
<span class="k">if</span> <span class="s2">&quot;start&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">state</span><span class="p">:</span>
<span class="n">state</span><span class="p">[</span><span class="s2">&quot;start&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">__setstate__</span><span class="p">(</span><span class="n">state</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">to_jsonable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Converts a list of samples to a list of ints.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">sample_n</span><span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="nf">from_jsonable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">int</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Converts a list of json samples to a list of np.int64.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">sample_n</span><span class="p">]</span></div>
</pre></div>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../../../../_static/documentation_options.js?v=151cd43d"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,840 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../../../../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/_modules/gymnasium/spaces/multi_binary.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><link rel="index" title="Index" href="../../../../genindex/" /><link rel="search" title="Search" href="../../../../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/_modules/gymnasium/spaces/multi_binary.html" />
<link rel="shortcut icon" href="../../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>gymnasium.spaces.multi_binary - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../../../../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../../../../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../../../../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<h1>Source code for gymnasium.spaces.multi_binary</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Implementation of a space that consists of binary np.ndarrays of a fixed shape.&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Sequence</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">numpy.typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">NDArray</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.spaces.space</span><span class="w"> </span><span class="kn">import</span> <span class="n">MaskNDArray</span><span class="p">,</span> <span class="n">Space</span>
<div class="viewcode-block" id="MultiBinary">
<a class="viewcode-back" href="../../../../api/spaces/fundamental/#gymnasium.spaces.MultiBinary">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">MultiBinary</span><span class="p">(</span><span class="n">Space</span><span class="p">[</span><span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int8</span><span class="p">]]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;An n-shape binary space.</span>
<span class="sd"> Elements of this space are binary arrays of a shape that is fixed during construction.</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; from gymnasium.spaces import MultiBinary</span>
<span class="sd"> &gt;&gt;&gt; observation_space = MultiBinary(5, seed=42)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample()</span>
<span class="sd"> array([1, 0, 1, 0, 1], dtype=int8)</span>
<span class="sd"> &gt;&gt;&gt; observation_space = MultiBinary([3, 2], seed=42)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample()</span>
<span class="sd"> array([[1, 0],</span>
<span class="sd"> [1, 0],</span>
<span class="sd"> [1, 1]], dtype=int8)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">n</span><span class="p">:</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">|</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">|</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">Generator</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Constructor of :class:`MultiBinary` space.</span>
<span class="sd"> Args:</span>
<span class="sd"> n: This will fix the shape of elements of the space. It can either be an integer (if the space is flat)</span>
<span class="sd"> or some sort of sequence (tuple, list or np.ndarray) if there are multiple axes.</span>
<span class="sd"> seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n</span> <span class="o">=</span> <span class="n">n</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
<span class="n">input_n</span> <span class="o">=</span> <span class="p">(</span><span class="n">n</span><span class="p">,)</span>
<span class="k">assert</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">input_n</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span> <span class="c1"># n (counts) have to be positive</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="p">(</span><span class="n">Sequence</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">)):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">n</span> <span class="o">=</span> <span class="n">input_n</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">n</span><span class="p">)</span>
<span class="k">assert</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">input_n</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span> <span class="c1"># n (counts) have to be positive</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Expected n to be an int or a sequence of ints, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">n</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">input_n</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">int8</span><span class="p">,</span> <span class="n">seed</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">shape</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Has stricter type than gym.Space - never None.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_shape</span> <span class="c1"># type: ignore</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">is_np_flattenable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Checks whether this space can be flattened to a :class:`spaces.Box`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">True</span>
<div class="viewcode-block" id="MultiBinary.sample">
<a class="viewcode-back" href="../../../../api/spaces/fundamental/#gymnasium.spaces.MultiBinary.sample">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">sample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">mask</span><span class="p">:</span> <span class="n">MaskNDArray</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">probability</span><span class="p">:</span> <span class="n">MaskNDArray</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int8</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Generates a single random sample from this space.</span>
<span class="sd"> A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space).</span>
<span class="sd"> Args:</span>
<span class="sd"> mask: An optional ``np.ndarray`` to mask samples with expected shape of ``space.shape``.</span>
<span class="sd"> For ``mask == 0`` then the samples will be ``0``, for a ``mask == 1`` then the samples will be ``1``.</span>
<span class="sd"> For random samples, using a mask value of ``2``.</span>
<span class="sd"> The expected mask shape is the space shape and mask dtype is ``np.int8``.</span>
<span class="sd"> probability: An optional ``np.ndarray`` to mask samples with expected shape of space.shape where each element</span>
<span class="sd"> represents the probability of the corresponding sample element being a 1.</span>
<span class="sd"> The expected mask shape is the space shape and mask dtype is ``np.float64``.</span>
<span class="sd"> Returns:</span>
<span class="sd"> Sampled values from space</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Only one of `mask` or `probability` can be provided, actual values: mask=</span><span class="si">{</span><span class="n">mask</span><span class="si">}</span><span class="s2">, probability=</span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">mask</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected type of the mask is np.ndarray, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">mask</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">int8</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected dtype of the mask is np.int8, actual dtype: </span><span class="si">{</span><span class="n">mask</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">mask</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected shape of the mask is </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">, actual shape: </span><span class="si">{</span><span class="n">mask</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span>
<span class="p">(</span><span class="n">mask</span> <span class="o">==</span> <span class="mi">0</span><span class="p">)</span> <span class="o">|</span> <span class="p">(</span><span class="n">mask</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span> <span class="o">|</span> <span class="p">(</span><span class="n">mask</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;All values of a mask should be 0, 1 or 2, actual values: </span><span class="si">{</span><span class="n">mask</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span>
<span class="n">mask</span> <span class="o">==</span> <span class="mi">2</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="n">low</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">high</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span>
<span class="n">mask</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">probability</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected type of the probability is np.ndarray, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">probability</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">probability</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected dtype of the probability is np.float64, actual dtype: </span><span class="si">{</span><span class="n">probability</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">probability</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;The expected shape of the probability is </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">, actual shape: </span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">probability</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">probability</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">)</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;All values of the sample probability should be between 0 and 1, actual values: </span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">random</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="n">probability</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dtype</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="n">low</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">high</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">contains</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return boolean specifying if x is a valid member of this space.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">Sequence</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="c1"># Promote list to array for contains check</span>
<span class="k">return</span> <span class="nb">bool</span><span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">)</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span>
<span class="ow">and</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">logical_or</span><span class="p">(</span><span class="n">x</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="n">x</span> <span class="o">==</span> <span class="mi">1</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">to_jsonable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int8</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">int</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a batch of samples from this space to a JSONable data type.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">sample_n</span><span class="p">)</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span>
<span class="k">def</span><span class="w"> </span><span class="nf">from_jsonable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">int</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">int8</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a JSONable data type to a batch of samples from this space.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="n">sample_n</span><span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gives a string representation of this space.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;MultiBinary(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="si">}</span><span class="s2">)&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether `other` is equivalent to this instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">MultiBinary</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">n</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">n</span></div>
</pre></div>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../../../../_static/documentation_options.js?v=151cd43d"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

View File

@@ -0,0 +1,966 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../../../../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/_modules/gymnasium/spaces/multi_discrete.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><link rel="index" title="Index" href="../../../../genindex/" /><link rel="search" title="Search" href="../../../../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/_modules/gymnasium/spaces/multi_discrete.html" />
<link rel="shortcut icon" href="../../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>gymnasium.spaces.multi_discrete - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../../../../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../../../../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../../../../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<h1>Source code for gymnasium.spaces.multi_discrete</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Implementation of a space that represents the cartesian product of `Discrete` spaces.&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Iterable</span><span class="p">,</span> <span class="n">Mapping</span><span class="p">,</span> <span class="n">Sequence</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">numpy.typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">NDArray</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">gym</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.spaces.discrete</span><span class="w"> </span><span class="kn">import</span> <span class="n">Discrete</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.spaces.space</span><span class="w"> </span><span class="kn">import</span> <span class="n">MaskNDArray</span><span class="p">,</span> <span class="n">Space</span>
<div class="viewcode-block" id="MultiDiscrete">
<a class="viewcode-back" href="../../../../api/spaces/fundamental/#gymnasium.spaces.MultiDiscrete">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">MultiDiscrete</span><span class="p">(</span><span class="n">Space</span><span class="p">[</span><span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">]]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;This represents the cartesian product of arbitrary :class:`Discrete` spaces.</span>
<span class="sd"> It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space.</span>
<span class="sd"> Note:</span>
<span class="sd"> Some environment wrappers assume a value of 0 always represents the NOOP action.</span>
<span class="sd"> e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces:</span>
<span class="sd"> 1. Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4</span>
<span class="sd"> 2. Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1</span>
<span class="sd"> 3. Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1</span>
<span class="sd"> It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample might be ``array([3, 1, 0])``.</span>
<span class="sd"> Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes</span>
<span class="sd"> if ``nvec`` has several axes:</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; from gymnasium.spaces import MultiDiscrete</span>
<span class="sd"> &gt;&gt;&gt; import numpy as np</span>
<span class="sd"> &gt;&gt;&gt; observation_space = MultiDiscrete(np.array([[1, 2], [3, 4]]), seed=42)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample()</span>
<span class="sd"> array([[0, 0],</span>
<span class="sd"> [2, 2]])</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">nvec</span><span class="p">:</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">|</span> <span class="nb">list</span><span class="p">[</span><span class="nb">int</span><span class="p">],</span>
<span class="n">dtype</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="nb">type</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">,</span>
<span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">Generator</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">start</span><span class="p">:</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">|</span> <span class="nb">list</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Constructor of :class:`MultiDiscrete` space.</span>
<span class="sd"> The argument ``nvec`` will determine the number of values each categorical variable can take. If</span>
<span class="sd"> ``start`` is provided, it will define the minimal values corresponding to each categorical variable.</span>
<span class="sd"> Args:</span>
<span class="sd"> nvec: vector of counts of each categorical variable. This will usually be a list of integers. However,</span>
<span class="sd"> you may also pass a more complicated numpy array if you&#39;d like the space to have several axes.</span>
<span class="sd"> dtype: This should be some kind of integer type.</span>
<span class="sd"> seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space.</span>
<span class="sd"> start: Optionally, the starting value the element of each class will take (defaults to 0).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># determine dtype</span>
<span class="k">if</span> <span class="n">dtype</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;MultiDiscrete dtype must be explicitly provided, cannot be None.&quot;</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dtype</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">dtype</span><span class="p">(</span><span class="n">dtype</span><span class="p">)</span>
<span class="c1"># * check that dtype is an accepted dtype</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Invalid MultiDiscrete dtype (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">), must be an integer dtype&quot;</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">nvec</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">nvec</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="n">start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">start</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="o">.</span><span class="n">shape</span>
<span class="p">),</span> <span class="s2">&quot;start and nvec (counts) should have the same shape&quot;</span>
<span class="k">assert</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">(),</span> <span class="s2">&quot;nvec (counts) have to be positive&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">seed</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">shape</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Has stricter type than :class:`gym.Space` - never None.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_shape</span> <span class="c1"># type: ignore</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">is_np_flattenable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Checks whether this space can be flattened to a :class:`spaces.Box`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">True</span>
<div class="viewcode-block" id="MultiDiscrete.sample">
<a class="viewcode-back" href="../../../../api/spaces/fundamental/#gymnasium.spaces.MultiDiscrete.sample">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">sample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">mask</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">MaskNDArray</span><span class="p">,</span> <span class="o">...</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">probability</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">MaskNDArray</span><span class="p">,</span> <span class="o">...</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Generates a single random sample from this space.</span>
<span class="sd"> Args:</span>
<span class="sd"> mask: An optional mask for multi-discrete, expects tuples with a ``np.ndarray`` mask in the position of each</span>
<span class="sd"> action with shape ``(n,)`` where ``n`` is the number of actions and ``dtype=np.int8``.</span>
<span class="sd"> Only ``mask values == 1`` are possible to sample unless all mask values for an action are ``0`` then the default action ``self.start`` (the smallest element) is sampled.</span>
<span class="sd"> probability: An optional probability mask for multi-discrete, expects tuples with a ``np.ndarray`` probability mask in the position of each</span>
<span class="sd"> action with shape ``(n,)`` where ``n`` is the number of actions and ``dtype=np.float64``.</span>
<span class="sd"> Only probability mask values within ``[0,1]`` are possible to sample as long as the sum of all values is ``1``.</span>
<span class="sd"> Returns:</span>
<span class="sd"> An ``np.ndarray`` of :meth:`Space.shape`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Only one of `mask` or `probability` can be provided, actual values: mask=</span><span class="si">{</span><span class="n">mask</span><span class="si">}</span><span class="s2">, probability=</span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_apply_mask</span><span class="p">(</span><span class="n">mask</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span><span class="p">,</span> <span class="s2">&quot;mask&quot;</span><span class="p">),</span>
<span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_apply_mask</span><span class="p">(</span><span class="n">probability</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span><span class="p">,</span> <span class="s2">&quot;probability&quot;</span><span class="p">),</span>
<span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">random</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dtype</span>
<span class="p">)</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">_apply_mask</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">sub_mask</span><span class="p">:</span> <span class="n">MaskNDArray</span> <span class="o">|</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">MaskNDArray</span><span class="p">,</span> <span class="o">...</span><span class="p">],</span>
<span class="n">sub_nvec</span><span class="p">:</span> <span class="n">MaskNDArray</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span>
<span class="n">sub_start</span><span class="p">:</span> <span class="n">MaskNDArray</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">],</span>
<span class="n">mask_type</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span> <span class="o">|</span> <span class="nb">list</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns a sample using the provided mask or probability mask.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sub_nvec</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">sub_mask</span><span class="p">,</span> <span class="nb">tuple</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the mask to be a tuple for sub_nvec (</span><span class="si">{</span><span class="n">sub_nvec</span><span class="si">}</span><span class="s2">), actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
<span class="n">sub_nvec</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the mask length to be equal to the number of actions, mask length: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span><span class="si">}</span><span class="s2">, nvec length: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">sub_nvec</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">return</span> <span class="p">[</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_apply_mask</span><span class="p">(</span><span class="n">new_mask</span><span class="p">,</span> <span class="n">new_nvec</span><span class="p">,</span> <span class="n">new_start</span><span class="p">,</span> <span class="n">mask_type</span><span class="p">)</span>
<span class="k">for</span> <span class="n">new_mask</span><span class="p">,</span> <span class="n">new_nvec</span><span class="p">,</span> <span class="n">new_start</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">,</span> <span class="n">sub_nvec</span><span class="p">,</span> <span class="n">sub_start</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span>
<span class="nb">type</span><span class="p">(</span><span class="n">sub_nvec</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">integer</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the sub_nvec to be an action, actually: </span><span class="si">{</span><span class="n">sub_nvec</span><span class="si">}</span><span class="s2">, </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">sub_nvec</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">sub_mask</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the sub mask to be np.ndarray, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span> <span class="o">==</span> <span class="n">sub_nvec</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the mask length to be equal to the number of actions, mask length: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span><span class="si">}</span><span class="s2">, action: </span><span class="si">{</span><span class="n">sub_nvec</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">if</span> <span class="n">mask_type</span> <span class="o">==</span> <span class="s2">&quot;mask&quot;</span><span class="p">:</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">sub_mask</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">int8</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the mask dtype to be np.int8, actual dtype: </span><span class="si">{</span><span class="n">sub_mask</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="n">valid_action_mask</span> <span class="o">=</span> <span class="n">sub_mask</span> <span class="o">==</span> <span class="mi">1</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">logical_or</span><span class="p">(</span><span class="n">sub_mask</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="n">valid_action_mask</span><span class="p">)</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects all masks values to 0 or 1, actual values: </span><span class="si">{</span><span class="n">sub_mask</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">valid_action_mask</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">valid_action_mask</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span> <span class="o">+</span> <span class="n">sub_start</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">sub_start</span>
<span class="k">elif</span> <span class="n">mask_type</span> <span class="o">==</span> <span class="s2">&quot;probability&quot;</span><span class="p">:</span>
<span class="k">assert</span> <span class="p">(</span>
<span class="n">sub_mask</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the mask dtype to be np.float64, actual dtype: </span><span class="si">{</span><span class="n">sub_mask</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="n">valid_action_mask</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">sub_mask</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="n">sub_mask</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">logical_or</span><span class="p">(</span><span class="n">sub_mask</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="n">valid_action_mask</span><span class="p">)</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects all masks values to be between 0 and 1, actual values: </span><span class="si">{</span><span class="n">sub_mask</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="n">np</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">),</span> <span class="mi">1</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expects the sum of all mask values to be 1, actual sum: </span><span class="si">{</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="n">normalized_sub_mask</span> <span class="o">=</span> <span class="n">sub_mask</span> <span class="o">/</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">sub_mask</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">valid_action_mask</span><span class="p">)[</span><span class="mi">0</span><span class="p">],</span>
<span class="n">p</span><span class="o">=</span><span class="n">normalized_sub_mask</span><span class="p">[</span><span class="n">valid_action_mask</span><span class="p">],</span>
<span class="p">)</span>
<span class="o">+</span> <span class="n">sub_start</span>
<span class="p">)</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unsupported mask type: </span><span class="si">{</span><span class="n">mask_type</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">contains</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return boolean specifying if x is a valid member of this space.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">Sequence</span><span class="p">):</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="c1"># Promote list to array for contains check</span>
<span class="c1"># if nvec is uint32 and space dtype is uint32, then 0 &lt;= x &lt; self.nvec guarantees that x</span>
<span class="c1"># is within correct bounds for space dtype (even though x does not have to be unsigned)</span>
<span class="k">return</span> <span class="nb">bool</span><span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">)</span>
<span class="ow">and</span> <span class="n">x</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span>
<span class="ow">and</span> <span class="n">x</span><span class="o">.</span><span class="n">dtype</span> <span class="o">!=</span> <span class="nb">object</span>
<span class="ow">and</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">&lt;=</span> <span class="n">x</span><span class="p">)</span>
<span class="ow">and</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">x</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">&lt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">to_jsonable</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">int</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a batch of samples from this space to a JSONable data type.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="n">sample</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span> <span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="n">sample_n</span><span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="nf">from_jsonable</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="n">NDArray</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">integer</span><span class="p">[</span><span class="n">Any</span><span class="p">]]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a JSONable data type to a batch of samples from this space.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">)</span> <span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="n">sample_n</span><span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gives a string representation of this space.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;MultiDiscrete(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="si">}</span><span class="s2">, start=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">start</span><span class="si">}</span><span class="s2">)&quot;</span>
<span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;MultiDiscrete(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="si">}</span><span class="s2">)&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Extract a subspace from this ``MultiDiscrete`` space.&quot;&quot;&quot;</span>
<span class="n">nvec</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
<span class="n">start</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">start</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
<span class="k">if</span> <span class="n">nvec</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">subspace</span> <span class="o">=</span> <span class="n">Discrete</span><span class="p">(</span><span class="n">nvec</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="n">start</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">subspace</span> <span class="o">=</span> <span class="n">MultiDiscrete</span><span class="p">(</span><span class="n">nvec</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">start</span><span class="o">=</span><span class="n">start</span><span class="p">)</span>
<span class="c1"># you don&#39;t need to deepcopy as np random generator call replaces the state not the data</span>
<span class="n">subspace</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">bit_generator</span><span class="o">.</span><span class="n">state</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">bit_generator</span><span class="o">.</span><span class="n">state</span>
<span class="k">return</span> <span class="n">subspace</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gives the ``len`` of samples from this space.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="o">.</span><span class="n">ndim</span> <span class="o">&gt;=</span> <span class="mi">2</span><span class="p">:</span>
<span class="n">gym</span><span class="o">.</span><span class="n">logger</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;Getting the length of a multi-dimensional MultiDiscrete space.&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether ``other`` is equivalent to this instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">bool</span><span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">MultiDiscrete</span><span class="p">)</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">dtype</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">shape</span>
<span class="ow">and</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nvec</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">nvec</span><span class="p">)</span>
<span class="ow">and</span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">start</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">start</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">__setstate__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">state</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">|</span> <span class="n">Mapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Used when loading a pickled space.</span>
<span class="sd"> This method has to be implemented explicitly to allow for loading of legacy states.</span>
<span class="sd"> Args:</span>
<span class="sd"> state: The new state</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">state</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">state</span><span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;start&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">state</span><span class="p">:</span>
<span class="n">state</span><span class="p">[</span><span class="s2">&quot;start&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">state</span><span class="p">[</span><span class="s2">&quot;_shape&quot;</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">state</span><span class="p">[</span><span class="s2">&quot;dtype&quot;</span><span class="p">])</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">__setstate__</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></div>
</pre></div>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../../../../_static/documentation_options.js?v=151cd43d"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

View File

@@ -0,0 +1,886 @@
<!doctype html>
<html class="no-js" lang="en" data-content_root="../../../../">
<head><meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<meta name="color-scheme" content="light dark">
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
<meta property="og:title" content="Gymnasium Documentation" />
<meta property="og:type" content="website" />
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
<meta property="og:url" content="https://gymnasium.farama.org/_modules/gymnasium/spaces/oneof.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><link rel="index" title="Index" href="../../../../genindex/" /><link rel="search" title="Search" href="../../../../search/" />
<link rel="canonical" href="https://gymnasium.farama.org/_modules/gymnasium/spaces/oneof.html" />
<link rel="shortcut icon" href="../../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
<title>gymnasium.spaces.oneof - Gymnasium Documentation</title>
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo.css?v=3e7f4c72" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery.css?v=61a4c737" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/furo-extensions.css?v=82c8b628" />
<style>
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
</style></head>
<body>
<header class="farama-header" aria-label="Farama header">
<div class="farama-header__container">
<div class="farama-header__left--mobile">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<defs></defs>
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
</svg>
</label>
</div>
<div class="farama-header__left farama-header__center--mobile">
<a href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a>
</div>
<div class="farama-header__right">
<div class="farama-header-menu">
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
<img class="farama-black-logo-invert" src="../../../../_static/img/farama-logo-header.svg">
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
</svg>
</button>
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
<div class="farama-header-menu__header">
<a href="https://farama.org">
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
<span>Farama Foundation</span>
</a>
<div class="farama-header-menu-header__right">
<button id="farama-close-menu">
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
<line x1="3" y1="21" x2="21" y2="3"></line>
<line x1="3" y1="3" x2="21" y2="21"></line>
</svg>
</button>
</div>
</div>
<div class="farama-header-menu__body">
<!-- Response from farama.org/api/projects.json -->
</div>
</div>
</div>
</div>
</div>
</header>
<script>
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
</script>
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
<symbol id="svg-toc" viewBox="0 0 24 24">
<title>Contents</title>
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
</svg>
</symbol>
<symbol id="svg-menu" viewBox="0 0 24 24">
<title>Menu</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>
</symbol>
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
<title>Expand</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
<polyline points="9 18 15 12 9 6"></polyline>
</svg>
</symbol>
<symbol id="svg-sun" viewBox="0 0 24 24">
<title>Light mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
<circle cx="12" cy="12" r="5"></circle>
<line x1="12" y1="1" x2="12" y2="3"></line>
<line x1="12" y1="21" x2="12" y2="23"></line>
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
<line x1="1" y1="12" x2="3" y2="12"></line>
<line x1="21" y1="12" x2="23" y2="12"></line>
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
</svg>
</symbol>
<symbol id="svg-moon" viewBox="0 0 24 24">
<title>Dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
</svg>
</symbol>
<symbol id="svg-sun-half" viewBox="0 0 24 24">
<title>Auto light/dark mode</title>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
<circle cx="12" cy="12" r="9" />
<path d="M13 12h5" />
<path d="M13 15h4" />
<path d="M13 18h1" />
<path d="M13 9h4" />
<path d="M13 6h1" />
</svg>
</symbol>
</svg>
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
<label class="overlay sidebar-overlay" for="__navigation">
<div class="visually-hidden">Hide navigation sidebar</div>
</label>
<label class="overlay toc-overlay" for="__toc">
<div class="visually-hidden">Hide table of contents sidebar</div>
</label>
<div class="page">
<!--<header class="mobile-header">
<div class="header-left">
<label class="nav-overlay-icon" for="__navigation">
<div class="visually-hidden">Toggle site navigation sidebar</div>
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
</label>
</div>
<div class="header-center">
<a href="../../../../"><div class="brand">Gymnasium Documentation</div></a>
</div>
<div class="header-right">
<div class="theme-toggle-container theme-toggle-header">
<button class="theme-toggle">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
</button>
</div>
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
</label>
</div>
</header>-->
<aside class="sidebar-drawer">
<div class="sidebar-container">
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../../">
<img class="farama-header__logo only-light" src="../../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
<img class="farama-header__logo only-dark" src="../../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
<span class="farama-header__title">Gymnasium Documentation</span>
</a><form class="sidebar-search-container" method="get" action="../../../../search/" role="search">
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
<input type="hidden" name="check_keywords" value="yes">
<input type="hidden" name="area" value="default">
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/basic_usage/">Basic Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/train_agent/">Training an Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/record_agent/">Recording Agents</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/speed_up_env/">Speeding Up Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/env/">Env</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/registry/">Make and register</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/composite/">Composite Spaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/spaces/utils/">Spaces Utils</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/table/">List of Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/wrappers/">Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../api/vector/utils/">Utility functions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/utils/">Utility functions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../api/functional/">Functional Env</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/acrobot/">Acrobot</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/classic_control/pendulum/">Pendulum</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/car_racing/">Car Racing</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/blackjack/">Blackjack</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/taxi/">Taxi</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/ant/">Ant</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/hopper/">Hopper</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid/">Humanoid</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/pusher/">Pusher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/reacher/">Reacher</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/swimmer/">Swimmer</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../environments/mujoco/walker2d/">Walker2D</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/atari/">Atari</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../environments/third_party_environments/">External Environments</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/">Gymnasium Basics</a><input class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/environment_creation/">Make your own custom environment</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/handling_time_limits/">Handling Time Limits</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/gymnasium_basics/load_quadruped_model/">Load custom quadruped robot environments</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../../tutorials/training_agents/">Training Agents</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/blackjack_q_learning/">Solving Blackjack with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/frozenlake_q_learning/">Solving Frozenlake with Tabular Q-Learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/mujoco_reinforce/">Training using REINFORCE for Mujoco</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../../tutorials/training_agents/vector_a2c/">Speeding up A2C Training with Vector Envs</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../../tutorials/third-party-tutorials/">Third-Party Tutorials</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../gym_release_notes/">Gym Release Notes</a></li>
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
</ul>
</div>
</div>
</div>
</div>
</aside>
<div class="main-container">
<div class="main">
<div class="content">
<div class="article-container">
<a href="#" class="back-to-top muted-link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
</svg>
<span>Back to top</span>
</a>
<div class="content-icon-container"><div class="theme-toggle-container theme-toggle-content">
<button class="theme-toggle" title="Toggle color theme">
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
<svg class="theme-icon-when-auto">
<use href="#svg-sun-half"></use>
</svg>
<svg class="theme-icon-when-dark">
<use href="#svg-moon"></use>
</svg>
<svg class="theme-icon-when-light">
<use href="#svg-sun"></use>
</svg>
</button>
</div>
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
<div class="visually-hidden">Toggle table of contents sidebar</div>
<i class="icon"><svg>
<use href="#svg-toc"></use>
</svg></i>
</label>
</div>
<article role="main">
<h1>Source code for gymnasium.spaces.oneof</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Implementation of a space that represents the cartesian product of other spaces.&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">typing</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">Iterable</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Any</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.spaces.space</span><span class="w"> </span><span class="kn">import</span> <span class="n">Space</span>
<div class="viewcode-block" id="OneOf">
<a class="viewcode-back" href="../../../../api/spaces/composite/#gymnasium.spaces.OneOf">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">OneOf</span><span class="p">(</span><span class="n">Space</span><span class="p">[</span><span class="n">Any</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;An exclusive tuple (more precisely: the direct sum) of :class:`Space` instances.</span>
<span class="sd"> Elements of this space are elements of one of the constituent spaces.</span>
<span class="sd"> Example:</span>
<span class="sd"> &gt;&gt;&gt; from gymnasium.spaces import OneOf, Box, Discrete</span>
<span class="sd"> &gt;&gt;&gt; observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123)</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample() # the first element is the space index (Discrete in this case) and the second element is the sample from Discrete</span>
<span class="sd"> (np.int64(0), np.int64(0))</span>
<span class="sd"> &gt;&gt;&gt; observation_space.sample() # this time the Box space was sampled as index=1</span>
<span class="sd"> (np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32))</span>
<span class="sd"> &gt;&gt;&gt; observation_space[0]</span>
<span class="sd"> Discrete(2)</span>
<span class="sd"> &gt;&gt;&gt; observation_space[1]</span>
<span class="sd"> Box(-1.0, 1.0, (2,), float32)</span>
<span class="sd"> &gt;&gt;&gt; len(observation_space)</span>
<span class="sd"> 2</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">spaces</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Space</span><span class="p">[</span><span class="n">Any</span><span class="p">]],</span>
<span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="n">typing</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">|</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">Generator</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;Constructor of :class:`OneOf` space.</span>
<span class="sd"> The generated instance will represent the cartesian product :math:`\text{spaces}[0] \times ... \times \text{spaces}[-1]`.</span>
<span class="sd"> Args:</span>
<span class="sd"> spaces (Iterable[Space]): The spaces that are involved in the cartesian product.</span>
<span class="sd"> seed: Optionally, you can use this argument to seed the RNGs of the ``spaces`` to ensure reproducible sampling.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spaces</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">spaces</span><span class="si">}</span><span class="s2"> is not an iterable&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spaces</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">spaces</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Empty `OneOf` spaces are not supported.&quot;</span>
<span class="k">for</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">space</span><span class="p">,</span> <span class="n">Space</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">space</span><span class="si">}</span><span class="s2"> does not inherit from `gymnasium.Space`. Actual Type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">space</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="n">seed</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">is_np_flattenable</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Checks whether this space can be flattened to a :class:`spaces.Box`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">all</span><span class="p">(</span><span class="n">space</span><span class="o">.</span><span class="n">is_np_flattenable</span> <span class="k">for</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<div class="viewcode-block" id="OneOf.seed">
<a class="viewcode-back" href="../../../../api/spaces/composite/#gymnasium.spaces.OneOf.seed">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">seed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seed</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="o">...</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Seed the PRNG of this space and all subspaces.</span>
<span class="sd"> Depending on the type of seed, the subspaces will be seeded differently</span>
<span class="sd"> * ``None`` - All the subspaces will use a random initial seed</span>
<span class="sd"> * ``Int`` - The integer is used to seed the :class:`Tuple` space that is used to generate seed values for each of the subspaces. Warning, this does not guarantee unique seeds for all the subspaces.</span>
<span class="sd"> * ``Tuple[int, ...]`` - Values used to seed the subspaces, first value seeds the OneOf and subsequent seed the subspaces. This allows the seeding of multiple composite subspaces ``[42, 54, ...]``.</span>
<span class="sd"> Args:</span>
<span class="sd"> seed: An optional int or tuple of ints to seed the OneOf space and subspaces. See above for more details.</span>
<span class="sd"> Returns:</span>
<span class="sd"> A tuple of ints used to seed the OneOf space and subspaces</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">super_seed</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">super_seed</span><span class="p">,)</span> <span class="o">+</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">space</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span> <span class="k">for</span> <span class="n">space</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">seed</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="n">super_seed</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
<span class="n">subseeds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">iinfo</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">int32</span><span class="p">)</span><span class="o">.</span><span class="n">max</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<span class="p">)</span>
<span class="c1"># this is necessary such that after int or list/tuple seeding, the OneOf PRNG are equivalent</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">super_seed</span><span class="p">,)</span> <span class="o">+</span> <span class="nb">tuple</span><span class="p">(</span>
<span class="n">space</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">subseed</span><span class="p">))</span>
<span class="k">for</span> <span class="n">space</span><span class="p">,</span> <span class="n">subseed</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">,</span> <span class="n">subseeds</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">seed</span><span class="p">,</span> <span class="p">(</span><span class="nb">tuple</span><span class="p">,</span> <span class="nb">list</span><span class="p">)):</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Expects that the subspaces of seeds equals the number of subspaces + 1. Actual length of seeds: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span><span class="si">}</span><span class="s2">, length of subspaces: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">seed</span><span class="p">[</span><span class="mi">0</span><span class="p">]),)</span> <span class="o">+</span> <span class="nb">tuple</span><span class="p">(</span>
<span class="n">space</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">subseed</span><span class="p">)</span> <span class="k">for</span> <span class="n">space</span><span class="p">,</span> <span class="n">subseed</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">,</span> <span class="n">seed</span><span class="p">[</span><span class="mi">1</span><span class="p">:])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Expected None, int, or tuple of ints, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">seed</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="OneOf.sample">
<a class="viewcode-back" href="../../../../api/spaces/composite/#gymnasium.spaces.OneOf.sample">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">sample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">mask</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">Any</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="o">...</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">probability</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="n">Any</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="o">...</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Generates a single random sample inside this space.</span>
<span class="sd"> This method draws independent samples from the subspaces.</span>
<span class="sd"> Args:</span>
<span class="sd"> mask: An optional tuple of optional masks for each of the subspace&#39;s samples,</span>
<span class="sd"> expects the same number of masks as spaces</span>
<span class="sd"> probability: An optional tuple of optional probability masks for each of the subspace&#39;s samples,</span>
<span class="sd"> expects the same number of probability masks as spaces</span>
<span class="sd"> Returns:</span>
<span class="sd"> Tuple of the subspace&#39;s samples</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">subspace_idx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">)</span>
<span class="n">subspace</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">subspace_idx</span><span class="p">]</span>
<span class="k">if</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Only one of `mask` or `probability` can be provided, actual values: mask=</span><span class="si">{</span><span class="n">mask</span><span class="si">}</span><span class="s2">, probability=</span><span class="si">{</span><span class="n">probability</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">mask</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">mask</span><span class="p">,</span> <span class="nb">tuple</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected type of `mask` is tuple, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spaces</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected length of `mask` is </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span><span class="si">}</span><span class="s2">, actual length: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="n">subspace_sample</span> <span class="o">=</span> <span class="n">subspace</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">mask</span><span class="o">=</span><span class="n">mask</span><span class="p">[</span><span class="n">subspace_idx</span><span class="p">])</span>
<span class="k">elif</span> <span class="n">probability</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">probability</span><span class="p">,</span> <span class="nb">tuple</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected type of `probability` is tuple, actual type: </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">probability</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">probability</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spaces</span>
<span class="p">),</span> <span class="sa">f</span><span class="s2">&quot;Expected length of `probability` is </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span><span class="si">}</span><span class="s2">, actual length: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">probability</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="n">subspace_sample</span> <span class="o">=</span> <span class="n">subspace</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">probability</span><span class="o">=</span><span class="n">probability</span><span class="p">[</span><span class="n">subspace_idx</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">subspace_sample</span> <span class="o">=</span> <span class="n">subspace</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
<span class="k">return</span> <span class="n">subspace_idx</span><span class="p">,</span> <span class="n">subspace_sample</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">contains</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return boolean specifying if x is a valid member of this space.&quot;&quot;&quot;</span>
<span class="c1"># subspace_idx, subspace_value = x</span>
<span class="k">return</span> <span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span>
<span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span>
<span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">,</span> <span class="nb">int</span><span class="p">))</span>
<span class="ow">and</span> <span class="mi">0</span> <span class="o">&lt;=</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">contains</span><span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Gives a string representation of this space.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="s2">&quot;OneOf(&quot;</span> <span class="o">+</span> <span class="s2">&quot;, &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">])</span> <span class="o">+</span> <span class="s2">&quot;)&quot;</span>
<span class="k">def</span><span class="w"> </span><span class="nf">to_jsonable</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="n">typing</span><span class="o">.</span><span class="n">Sequence</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">Any</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a batch of samples from this space to a JSONable data type.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span>
<span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">i</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">to_jsonable</span><span class="p">([</span><span class="n">subsample</span><span class="p">])[</span><span class="mi">0</span><span class="p">]]</span>
<span class="k">for</span> <span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">subsample</span><span class="p">)</span> <span class="ow">in</span> <span class="n">sample_n</span>
<span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="nf">from_jsonable</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_n</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">Any</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="o">...</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a JSONable data type to a batch of samples from this space.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span>
<span class="p">(</span>
<span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">(</span><span class="n">space_idx</span><span class="p">),</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">space_idx</span><span class="p">]</span><span class="o">.</span><span class="n">from_jsonable</span><span class="p">([</span><span class="n">jsonable_sample</span><span class="p">])[</span><span class="mi">0</span><span class="p">],</span>
<span class="p">)</span>
<span class="k">for</span> <span class="n">space_idx</span><span class="p">,</span> <span class="n">jsonable_sample</span> <span class="ow">in</span> <span class="n">sample_n</span>
<span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Space</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Get the subspace at specific `index`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Get the number of subspaces that are involved in the cartesian product.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spaces</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="fm">__eq__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Check whether ``other`` is equivalent to this instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">OneOf</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">spaces</span> <span class="o">==</span> <span class="n">other</span><span class="o">.</span><span class="n">spaces</span></div>
</pre></div>
</article>
</div>
<footer>
<div class="related-pages">
</div>
<div class="bottom-of-page">
<div class="left-details">
<div class="copyright">
Copyright &#169; 2025 Farama Foundation
</div>
<!--
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
<a href="https://github.com/pradyunsg/furo">Furo</a>
-->
</div>
<div class="right-details">
<div class="icons">
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
aria-label="On GitHub">
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
<path fill-rule="evenodd"
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
</path>
</svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<aside class="toc-drawer no-toc">
</aside>
</div>
</div>
</div>
<script>
const toggleMenu = () => {
const menuBtn = document.querySelector(".farama-header-menu__btn");
const menuContainer = document.querySelector(".farama-header-menu-container");
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
menuBtn.setAttribute("aria-expanded", "false");
menuContainer.setAttribute("aria-hidden", "true");
} else {
menuBtn.setAttribute("aria-expanded", "true");
menuContainer.setAttribute("aria-hidden", "false");
}
document.querySelector(".farama-header-menu").classList.toggle("active");
}
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
</script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
<script>
const enableGtag = () => {
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-6H9C8TWXZ8');
}
(() => {
if (!localStorage.getItem("acceptedCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie-alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie-alert__container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
Google Analytics</a> to collect statistics.`;
containerElem.appendChild(textElem);
const declineBtn = Object.assign(document.createElement("button"),
{
innerText: "Deny",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__decline",
}
);
declineBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", false);
boxElem.remove();
});
const acceptBtn = Object.assign(document.createElement("button"),
{
innerText: "Allow",
className: "farama-btn cookie-alert__button",
id: "cookie-alert__accept",
}
);
acceptBtn.addEventListener("click", () => {
localStorage.setItem("acceptedCookieAlert", true);
boxElem.remove();
enableGtag();
});
containerElem.appendChild(declineBtn);
containerElem.appendChild(acceptBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
enableGtag();
}
})()
</script>
<script src="../../../../_static/documentation_options.js?v=151cd43d"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../../../_static/scripts/furo.js?v=7660844c"></script>
<script>
const createProjectsList = (projects, displayImages) => {
const ulElem = Object.assign(document.createElement('ul'),
{
className:'farama-header-menu-list',
}
)
for (let project of projects) {
const liElem = document.createElement("li");
const aElem = Object.assign(document.createElement("a"),
{
href: project.link
}
);
liElem.appendChild(aElem);
if (displayImages) {
const imgElem = Object.assign(document.createElement("img"),
{
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
alt: `${project.name} logo`,
className: "farama-black-logo-invert"
}
);
aElem.appendChild(imgElem);
}
aElem.appendChild(document.createTextNode(project.name));
ulElem.appendChild(liElem);
}
return ulElem;
}
// Create menu with Farama projects by using the API at farama.org/api/projects.json
const createCORSRequest = (method, url) => {
let xhr = new XMLHttpRequest();
xhr.responseType = 'json';
if ("withCredentials" in xhr) {
xhr.open(method, url, true);
} else if (typeof XDomainRequest != "undefined") {
// IE8 & IE9
xhr = new XDomainRequest();
xhr.open(method, url);
} else {
// CORS not supported.
xhr = null;
}
return xhr;
};
const url = 'https://farama.org/api/projects.json';
const imagesBasepath = "https://farama.org/assets/images"
const method = 'GET';
let xhr = createCORSRequest(method, url);
xhr.onload = () => {
const jsonResponse = xhr.response;
const sections = {
"Core Projects": [],
"Mature Projects": {
"Documentation": [],
"Repositories": [],
},
"Incubating Projects": {
"Documentation": [],
"Repositories": [],
},
"Foundation": [
{
name: "About",
link: "https://farama.org/about"
},
{
name: "Standards",
link: "https://farama.org/project_standards",
},
{
name: "Donate",
link: "https://farama.org/donations"
}
]
}
// Categorize projects
Object.keys(jsonResponse).forEach(key => {
projectJson = jsonResponse[key];
if (projectJson.website !== null) {
projectJson.link = projectJson.website;
} else {
projectJson.link = projectJson.github;
}
if (projectJson.type === "core") {
sections["Core Projects"].push(projectJson)
} else if (projectJson.type == "mature") {
if (projectJson.website !== null) {
sections["Mature Projects"]["Documentation"].push(projectJson)
} else {
sections["Mature Projects"]["Repositories"].push(projectJson)
}
} else {
if (projectJson.website !== null) {
sections["Incubating Projects"]["Documentation"].push(projectJson)
} else {
sections["Incubating Projects"]["Repositories"].push(projectJson)
}
}
})
const menuContainer = document.querySelector(".farama-header-menu__body");
Object.keys(sections).forEach((key, i) => {
const sectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__section',
}
)
sectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__section-title' ,
innerText: key
}
))
// is not a list
if (sections[key].constructor !== Array) {
const subSections = sections[key];
const subSectionContainerElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsections-container',
style: 'display: flex'
}
)
Object.keys(subSections).forEach((subKey, i) => {
const subSectionElem = Object.assign(
document.createElement('div'), {
className:'farama-header-menu__subsection',
}
)
subSectionElem.appendChild(Object.assign(document.createElement('span'),
{
className:'farama-header-menu__subsection-title' ,
innerText: subKey
}
))
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
subSectionElem.appendChild(ulElem);
subSectionContainerElem.appendChild(subSectionElem);
})
sectionElem.appendChild(subSectionContainerElem);
} else {
const projects = sections[key];
const ulElem = createProjectsList(projects, true);
sectionElem.appendChild(ulElem);
}
menuContainer.appendChild(sectionElem)
});
}
xhr.onerror = function() {
console.error("Unable to load projects");
};
xhr.send();
</script>
<script>
const versioningConfig = {
githubUser: 'Farama-Foundation',
githubRepo: 'Gymnasium',
};
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
if (response.status === 200) {
response.text().then(text => {
const container = document.createElement("div");
container.innerHTML = text;
document.querySelector("body").appendChild(container);
// innerHtml doenst evaluate scripts, we need to add them dynamically
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
const newScript = document.createElement("script");
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
oldScript.parentNode.replaceChild(newScript, oldScript);
});
});
} else {
console.warn("Unable to load versioning menu", response);
}
});
</script>
</body>
</html>

Some files were not shown because too many files have changed in this diff Show More