mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-19 21:42:02 +00:00
1298 lines
100 KiB
HTML
1298 lines
100 KiB
HTML
![]() |
<!doctype html>
|
|||
|
<html class="no-js" lang="en" data-content_root="../../../">
|
|||
|
<head><meta charset="utf-8"/>
|
|||
|
<meta name="viewport" content="width=device-width,initial-scale=1"/>
|
|||
|
<meta name="color-scheme" content="light dark">
|
|||
|
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
|
|||
|
<meta property="og:title" content="Gymnasium Documentation" />
|
|||
|
<meta property="og:type" content="website" />
|
|||
|
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
|
|||
|
<meta property="og:url" content="https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><meta name="viewport" content="width=device-width, initial-scale=1" />
|
|||
|
<link rel="index" title="Index" href="../../../genindex/" /><link rel="search" title="Search" href="../../../search/" /><link rel="next" title="Training A2C with Vector Envs and Domain Randomization" href="../vector_envs_tutorial/" /><link rel="prev" title="Implementing Custom Wrappers" href="../implementing_custom_wrappers/" />
|
|||
|
<link rel="canonical" href="https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation.html" />
|
|||
|
|
|||
|
<link rel="shortcut icon" href="../../../_static/favicon.png"/><!-- Generated with Sphinx 7.4.7 and Furo 2023.08.19.dev1 -->
|
|||
|
<title>Make your own custom environment - Gymnasium Documentation</title>
|
|||
|
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
|
|||
|
<link rel="stylesheet" type="text/css" href="../../../_static/styles/furo.css?v=3e7f4c72" />
|
|||
|
<link rel="stylesheet" type="text/css" href="../../../_static/sg_gallery.css?v=61a4c737" />
|
|||
|
<link rel="stylesheet" type="text/css" href="../../../_static/sg_gallery-binder.css?v=f4aeca0c" />
|
|||
|
<link rel="stylesheet" type="text/css" href="../../../_static/sg_gallery-dataframe.css?v=2082cf3c" />
|
|||
|
<link rel="stylesheet" type="text/css" href="../../../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
|
|||
|
<link rel="stylesheet" type="text/css" href="../../../_static/styles/furo-extensions.css?v=82c8b628" />
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style>
|
|||
|
body {
|
|||
|
--color-code-background: #f8f8f8;
|
|||
|
--color-code-foreground: black;
|
|||
|
|
|||
|
}
|
|||
|
@media not print {
|
|||
|
body[data-theme="dark"] {
|
|||
|
--color-code-background: #202020;
|
|||
|
--color-code-foreground: #d0d0d0;
|
|||
|
|
|||
|
}
|
|||
|
@media (prefers-color-scheme: dark) {
|
|||
|
body:not([data-theme="light"]) {
|
|||
|
--color-code-background: #202020;
|
|||
|
--color-code-foreground: #d0d0d0;
|
|||
|
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
</style></head>
|
|||
|
<body>
|
|||
|
<header class="farama-header" aria-label="Farama header">
|
|||
|
<div class="farama-header__container">
|
|||
|
<div class="farama-header__left--mobile">
|
|||
|
<label class="nav-overlay-icon" for="__navigation">
|
|||
|
<div class="visually-hidden">Toggle site navigation sidebar</div>
|
|||
|
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
|||
|
<defs></defs>
|
|||
|
<line x1="0.5" y1="4" x2="23.5" y2="4"></line>
|
|||
|
<line x1="0.232" y1="12" x2="23.5" y2="12"></line>
|
|||
|
<line x1="0.232" y1="20" x2="23.5" y2="20"></line>
|
|||
|
</svg>
|
|||
|
</label>
|
|||
|
</div>
|
|||
|
<div class="farama-header__left farama-header__center--mobile">
|
|||
|
<a href="../../../">
|
|||
|
<img class="farama-header__logo only-light" src="../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
|
|||
|
<img class="farama-header__logo only-dark" src="../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
|
|||
|
<span class="farama-header__title">Gymnasium Documentation</span>
|
|||
|
</a>
|
|||
|
</div>
|
|||
|
<div class="farama-header__right">
|
|||
|
<div class="farama-header-menu">
|
|||
|
<button class="farama-header-menu__btn" aria-label="Open Farama Menu" aria-expanded="false" aria-haspopup="true" aria-controls="farama-menu">
|
|||
|
<img class="farama-black-logo-invert" src="../../../_static/img/farama-logo-header.svg">
|
|||
|
<svg viewBox="0 0 24 24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
|||
|
<polyline style="stroke-linecap: round; stroke-linejoin: round; fill: none; stroke-width: 2px;" points="1 7 12 18 23 7"></polyline>
|
|||
|
</svg>
|
|||
|
</button>
|
|||
|
<div class="farama-header-menu-container farama-hidden" aria-hidden="true" id="farama-menu">
|
|||
|
<div class="farama-header-menu__header">
|
|||
|
<a href="https://farama.org">
|
|||
|
<img class="farama-header-menu__logo farama-white-logo-invert" src="../../../_static/img/farama_solid_white.svg" alt="Farama Foundation logo">
|
|||
|
<span>Farama Foundation</span>
|
|||
|
</a>
|
|||
|
<div class="farama-header-menu-header__right">
|
|||
|
<button id="farama-close-menu">
|
|||
|
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
|
|||
|
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
|
|||
|
<line x1="3" y1="21" x2="21" y2="3"></line>
|
|||
|
<line x1="3" y1="3" x2="21" y2="21"></line>
|
|||
|
</svg>
|
|||
|
</button>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div class="farama-header-menu__body">
|
|||
|
<!-- Response from farama.org/api/projects.json -->
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</header>
|
|||
|
|
|||
|
|
|||
|
<script>
|
|||
|
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
|
|||
|
</script>
|
|||
|
|
|||
|
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
|||
|
<symbol id="svg-toc" viewBox="0 0 24 24">
|
|||
|
<title>Contents</title>
|
|||
|
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
|
|||
|
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-menu" viewBox="0 0 24 24">
|
|||
|
<title>Menu</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|||
|
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
|
|||
|
<line x1="3" y1="12" x2="21" y2="12"></line>
|
|||
|
<line x1="3" y1="6" x2="21" y2="6"></line>
|
|||
|
<line x1="3" y1="18" x2="21" y2="18"></line>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
|
|||
|
<title>Expand</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|||
|
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
|
|||
|
<polyline points="9 18 15 12 9 6"></polyline>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-sun" viewBox="0 0 24 24">
|
|||
|
<title>Light mode</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|||
|
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
|
|||
|
<circle cx="12" cy="12" r="5"></circle>
|
|||
|
<line x1="12" y1="1" x2="12" y2="3"></line>
|
|||
|
<line x1="12" y1="21" x2="12" y2="23"></line>
|
|||
|
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
|||
|
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
|||
|
<line x1="1" y1="12" x2="3" y2="12"></line>
|
|||
|
<line x1="21" y1="12" x2="23" y2="12"></line>
|
|||
|
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
|||
|
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-moon" viewBox="0 0 24 24">
|
|||
|
<title>Dark mode</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|||
|
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
|
|||
|
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
|||
|
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
<symbol id="svg-sun-half" viewBox="0 0 24 24">
|
|||
|
<title>Auto light/dark mode</title>
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
|||
|
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
|
|||
|
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
|
|||
|
<circle cx="12" cy="12" r="9" />
|
|||
|
<path d="M13 12h5" />
|
|||
|
<path d="M13 15h4" />
|
|||
|
<path d="M13 18h1" />
|
|||
|
<path d="M13 9h4" />
|
|||
|
<path d="M13 6h1" />
|
|||
|
</svg>
|
|||
|
</symbol>
|
|||
|
</svg>
|
|||
|
|
|||
|
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
|
|||
|
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
|
|||
|
<label class="overlay sidebar-overlay" for="__navigation">
|
|||
|
<div class="visually-hidden">Hide navigation sidebar</div>
|
|||
|
</label>
|
|||
|
<label class="overlay toc-overlay" for="__toc">
|
|||
|
<div class="visually-hidden">Hide table of contents sidebar</div>
|
|||
|
</label>
|
|||
|
|
|||
|
<div class="page">
|
|||
|
<!--<header class="mobile-header">
|
|||
|
<div class="header-left">
|
|||
|
<label class="nav-overlay-icon" for="__navigation">
|
|||
|
<div class="visually-hidden">Toggle site navigation sidebar</div>
|
|||
|
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
|
|||
|
</label>
|
|||
|
</div>
|
|||
|
<div class="header-center">
|
|||
|
<a href="../../../"><div class="brand">Gymnasium Documentation</div></a>
|
|||
|
</div>
|
|||
|
<div class="header-right">
|
|||
|
<div class="theme-toggle-container theme-toggle-header">
|
|||
|
<button class="theme-toggle">
|
|||
|
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
|||
|
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
|||
|
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
|||
|
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
|||
|
</button>
|
|||
|
</div>
|
|||
|
<label class="toc-overlay-icon toc-header-icon" for="__toc">
|
|||
|
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
|||
|
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
|
|||
|
</label>
|
|||
|
</div>
|
|||
|
</header>-->
|
|||
|
<aside class="sidebar-drawer">
|
|||
|
<div class="sidebar-container">
|
|||
|
|
|||
|
<div class="sidebar-sticky"><a class="farama-sidebar__title" href="../../../">
|
|||
|
<img class="farama-header__logo only-light" src="../../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
|
|||
|
<img class="farama-header__logo only-dark" src="../../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
|
|||
|
<span class="farama-header__title">Gymnasium Documentation</span>
|
|||
|
</a><form class="sidebar-search-container" method="get" action="../../../search/" role="search">
|
|||
|
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
|
|||
|
<input type="hidden" name="check_keywords" value="yes">
|
|||
|
<input type="hidden" name="area" value="default">
|
|||
|
</form>
|
|||
|
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
|
|||
|
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
|
|||
|
<ul>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../introduction/basic_usage/">Basic Usage</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../introduction/train_agent/">Training an Agent</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../introduction/create_custom_env/">Create a Custom Environment</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../introduction/record_agent/">Recording Agents</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../introduction/speed_up_env/">Speeding Up Training</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../introduction/gym_compatibility/">Compatibility with Gym</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../introduction/migration_guide/">Migration Guide - v0.21 to v1.0.0</a></li>
|
|||
|
</ul>
|
|||
|
<p class="caption" role="heading"><span class="caption-text">API</span></p>
|
|||
|
<ul>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../api/env/">Env</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../api/registry/">Make and register</a></li>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of Spaces</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/spaces/fundamental/">Fundamental Spaces</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/spaces/composite/">Composite Spaces</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/spaces/utils/">Spaces Utils</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle navigation of Wrappers</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/wrappers/table/">List of Wrappers</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../api/vector/">Vectorize</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle navigation of Vectorize</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/vector/wrappers/">Wrappers</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/vector/async_vector_env/">AsyncVectorEnv</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/vector/sync_vector_env/">SyncVectorEnv</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../api/vector/utils/">Utility functions</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../api/utils/">Utility functions</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../api/functional/">Functional Env</a></li>
|
|||
|
</ul>
|
|||
|
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
|
|||
|
<ul>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle navigation of Classic Control</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/classic_control/acrobot/">Acrobot</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/classic_control/cart_pole/">Cart Pole</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/classic_control/mountain_car/">Mountain Car</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/classic_control/pendulum/">Pendulum</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle navigation of Box2D</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/box2d/car_racing/">Car Racing</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle navigation of Toy Text</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/toy_text/blackjack/">Blackjack</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/toy_text/taxi/">Taxi</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle navigation of MuJoCo</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/ant/">Ant</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/hopper/">Hopper</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/humanoid/">Humanoid</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/pusher/">Pusher</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/reacher/">Reacher</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/swimmer/">Swimmer</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../../environments/mujoco/walker2d/">Walker2D</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../environments/atari/">Atari</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../environments/third_party_environments/">External Environments</a></li>
|
|||
|
</ul>
|
|||
|
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
|
|||
|
<ul class="current">
|
|||
|
<li class="toctree-l1 current has-children"><a class="reference internal" href="../">Gymnasium Basics Documentation Links</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><div class="visually-hidden">Toggle navigation of Gymnasium Basics Documentation Links</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul class="current">
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../load_quadruped_model/">Load custom quadruped robot environments</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../handling_time_limits/">Handling Time Limits</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../implementing_custom_wrappers/">Implementing Custom Wrappers</a></li>
|
|||
|
<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">Make your own custom environment</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../vector_envs_tutorial/">Training A2C with Vector Envs and Domain Randomization</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1 has-children"><a class="reference internal" href="../../training_agents/">Training Agents links in the Gymnasium Documentation</a><input class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><div class="visually-hidden">Toggle navigation of Training Agents links in the Gymnasium Documentation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../training_agents/reinforce_invpend_gym_v26/">Training using REINFORCE for Mujoco</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../training_agents/blackjack_tutorial/">Solving Blackjack with Q-Learning</a></li>
|
|||
|
<li class="toctree-l2"><a class="reference internal" href="../../training_agents/FrozenLake_tuto/">Frozenlake benchmark</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../third-party-tutorials/">Third-Party Tutorials</a></li>
|
|||
|
</ul>
|
|||
|
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
|
|||
|
<ul>
|
|||
|
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference external" href="https://arxiv.org/abs/2407.17032">Paper</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../gymnasium_release_notes/">Gymnasium Release Notes</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference internal" href="../../../gym_release_notes/">Gym Release Notes</a></li>
|
|||
|
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
|
|||
|
</ul>
|
|||
|
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
|
|||
|
</div>
|
|||
|
|
|||
|
</div>
|
|||
|
</aside>
|
|||
|
<div class="main-container">
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<div class="main">
|
|||
|
<div class="content">
|
|||
|
<div class="article-container">
|
|||
|
<a href="#" class="back-to-top muted-link">
|
|||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
|
|||
|
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
|
|||
|
</svg>
|
|||
|
<span>Back to top</span>
|
|||
|
</a>
|
|||
|
<div class="content-icon-container">
|
|||
|
<div class="edit-this-page">
|
|||
|
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/edit/main/docs/tutorials/gymnasium_basics/environment_creation.py" title="Edit this page">
|
|||
|
<svg aria-hidden="true" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
|
|||
|
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
|
|||
|
<path d="M4 20h4l10.5 -10.5a1.5 1.5 0 0 0 -4 -4l-10.5 10.5v4" />
|
|||
|
<line x1="13.5" y1="6.5" x2="17.5" y2="10.5" />
|
|||
|
</svg>
|
|||
|
<span class="visually-hidden">Edit this page</span>
|
|||
|
</a>
|
|||
|
</div><div class="theme-toggle-container theme-toggle-content">
|
|||
|
<button class="theme-toggle" title="Toggle color theme">
|
|||
|
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
|||
|
<svg class="theme-icon-when-auto">
|
|||
|
<use href="#svg-sun-half"></use>
|
|||
|
</svg>
|
|||
|
<svg class="theme-icon-when-dark">
|
|||
|
<use href="#svg-moon"></use>
|
|||
|
</svg>
|
|||
|
<svg class="theme-icon-when-light">
|
|||
|
<use href="#svg-sun"></use>
|
|||
|
</svg>
|
|||
|
</button>
|
|||
|
</div>
|
|||
|
<label class="toc-overlay-icon toc-content-icon" for="__toc">
|
|||
|
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
|||
|
<i class="icon"><svg>
|
|||
|
<use href="#svg-toc"></use>
|
|||
|
</svg></i>
|
|||
|
</label>
|
|||
|
</div>
|
|||
|
<article role="main">
|
|||
|
|
|||
|
<section class="sphx-glr-example-title" id="make-your-own-custom-environment">
|
|||
|
<span id="sphx-glr-tutorials-gymnasium-basics-environment-creation-py"></span><h1>Make your own custom environment<a class="headerlink" href="#make-your-own-custom-environment" title="Link to this heading">¶</a></h1>
|
|||
|
<p>This documentation overviews creating new environments and relevant
|
|||
|
useful wrappers, utilities and tests included in Gymnasium designed for
|
|||
|
the creation of new environments.</p>
|
|||
|
<section id="setup">
|
|||
|
<h2>Setup<a class="headerlink" href="#setup" title="Link to this heading">¶</a></h2>
|
|||
|
<section id="recommended-solution">
|
|||
|
<h3>Recommended solution<a class="headerlink" href="#recommended-solution" title="Link to this heading">¶</a></h3>
|
|||
|
<ol class="arabic simple">
|
|||
|
<li><p>Install <code class="docutils literal notranslate"><span class="pre">pipx</span></code> following the <a class="reference external" href="https://pypa.github.io/pipx/installation/">pipx documentation</a>.</p></li>
|
|||
|
<li><p>Then install Copier:</p></li>
|
|||
|
</ol>
|
|||
|
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">pipx install copier</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
<section id="alternative-solutions">
|
|||
|
<h3>Alternative solutions<a class="headerlink" href="#alternative-solutions" title="Link to this heading">¶</a></h3>
|
|||
|
<p>Install Copier with Pip or Conda:</p>
|
|||
|
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">pip install copier</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>or</p>
|
|||
|
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">conda install -c conda-forge copier</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<section id="generate-your-environment">
|
|||
|
<h2>Generate your environment<a class="headerlink" href="#generate-your-environment" title="Link to this heading">¶</a></h2>
|
|||
|
<p>You can check that <code class="docutils literal notranslate"><span class="pre">Copier</span></code> has been correctly installed by running the following command, which should output a version number:</p>
|
|||
|
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">copier --version</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>Then you can just run the following command and replace the string <code class="docutils literal notranslate"><span class="pre">path/to/directory</span></code> by the path to the directory where you want to create your new project.</p>
|
|||
|
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">copier copy https://github.com/Farama-Foundation/gymnasium-env-template.git "path/to/directory"</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>Answer the questions, and when it’s finished you should get a project structure like the following:</p>
|
|||
|
<div class="highlight-sh notranslate"><div class="highlight"><pre><span></span>.
|
|||
|
├──<span class="w"> </span>gymnasium_env
|
|||
|
│<span class="w"> </span>├──<span class="w"> </span>envs
|
|||
|
│<span class="w"> </span>│<span class="w"> </span>├──<span class="w"> </span>grid_world.py
|
|||
|
│<span class="w"> </span>│<span class="w"> </span>└──<span class="w"> </span>__init__.py
|
|||
|
│<span class="w"> </span>├──<span class="w"> </span>__init__.py
|
|||
|
│<span class="w"> </span>└──<span class="w"> </span>wrappers
|
|||
|
│<span class="w"> </span>├──<span class="w"> </span>clip_reward.py
|
|||
|
│<span class="w"> </span>├──<span class="w"> </span>discrete_actions.py
|
|||
|
│<span class="w"> </span>├──<span class="w"> </span>__init__.py
|
|||
|
│<span class="w"> </span>├──<span class="w"> </span>reacher_weighted_reward.py
|
|||
|
│<span class="w"> </span>└──<span class="w"> </span>relative_position.py
|
|||
|
├──<span class="w"> </span>LICENSE
|
|||
|
├──<span class="w"> </span>pyproject.toml
|
|||
|
└──<span class="w"> </span>README.md
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
<section id="subclassing-gymnasium-env">
|
|||
|
<h2>Subclassing gymnasium.Env<a class="headerlink" href="#subclassing-gymnasium-env" title="Link to this heading">¶</a></h2>
|
|||
|
<p>Before learning how to create your own environment you should check out
|
|||
|
<a class="reference external" href="/api/env">the documentation of Gymnasium’s API</a>.</p>
|
|||
|
<p>To illustrate the process of subclassing <code class="docutils literal notranslate"><span class="pre">gymnasium.Env</span></code>, we will
|
|||
|
implement a very simplistic game, called <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code>. We will write
|
|||
|
the code for our custom environment in
|
|||
|
<code class="docutils literal notranslate"><span class="pre">gymnasium_env/envs/grid_world.py</span></code>. The environment
|
|||
|
consists of a 2-dimensional square grid of fixed size (specified via the
|
|||
|
<code class="docutils literal notranslate"><span class="pre">size</span></code> parameter during construction). The agent can move vertically
|
|||
|
or horizontally between grid cells in each timestep. The goal of the
|
|||
|
agent is to navigate to a target on the grid that has been placed
|
|||
|
randomly at the beginning of the episode.</p>
|
|||
|
<ul class="simple">
|
|||
|
<li><p>Observations provide the location of the target and agent.</p></li>
|
|||
|
<li><p>There are 4 actions in our environment, corresponding to the
|
|||
|
movements “right”, “up”, “left”, and “down”.</p></li>
|
|||
|
<li><p>A done signal is issued as soon as the agent has navigated to the
|
|||
|
grid cell where the target is located.</p></li>
|
|||
|
<li><p>Rewards are binary and sparse, meaning that the immediate reward is
|
|||
|
always zero, unless the agent has reached the target, then it is 1.</p></li>
|
|||
|
</ul>
|
|||
|
<p>An episode in this environment (with <code class="docutils literal notranslate"><span class="pre">size=5</span></code>) might look like this:</p>
|
|||
|
<blockquote>
|
|||
|
<div><a class="reference internal image-reference" href="../../../_images/environment-creation-example-episode.gif"><img alt="Example episode of the custom environment" src="../../../_images/environment-creation-example-episode.gif" style="width: 400px;" />
|
|||
|
</a>
|
|||
|
</div></blockquote>
|
|||
|
<p>where the blue dot is the agent and the red square represents the
|
|||
|
target.</p>
|
|||
|
<p>Let us look at the source code of <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code> piece by piece:</p>
|
|||
|
<section id="declaration-and-initialization">
|
|||
|
<h3>Declaration and Initialization<a class="headerlink" href="#declaration-and-initialization" title="Link to this heading">¶</a></h3>
|
|||
|
<p>Our custom environment will inherit from the abstract class
|
|||
|
<code class="docutils literal notranslate"><span class="pre">gymnasium.Env</span></code>. You shouldn’t forget to add the <code class="docutils literal notranslate"><span class="pre">metadata</span></code>
|
|||
|
attribute to your class. There, you should specify the render-modes that
|
|||
|
are supported by your environment (e.g., <code class="docutils literal notranslate"><span class="pre">"human"</span></code>, <code class="docutils literal notranslate"><span class="pre">"rgb_array"</span></code>,
|
|||
|
<code class="docutils literal notranslate"><span class="pre">"ansi"</span></code>) and the framerate at which your environment should be
|
|||
|
rendered. Every environment should support <code class="docutils literal notranslate"><span class="pre">None</span></code> as render-mode; you
|
|||
|
don’t need to add it in the metadata. In <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code>, we will
|
|||
|
support the modes “rgb_array” and “human” and render at 4 FPS.</p>
|
|||
|
<p>The <code class="docutils literal notranslate"><span class="pre">__init__</span></code> method of our environment will accept the integer
|
|||
|
<code class="docutils literal notranslate"><span class="pre">size</span></code>, that determines the size of the square grid. We will set up
|
|||
|
some variables for rendering and define <code class="docutils literal notranslate"><span class="pre">self.observation_space</span></code> and
|
|||
|
<code class="docutils literal notranslate"><span class="pre">self.action_space</span></code>. In our case, observations should provide
|
|||
|
information about the location of the agent and target on the
|
|||
|
2-dimensional grid. We will choose to represent observations in the form
|
|||
|
of dictionaries with keys <code class="docutils literal notranslate"><span class="pre">"agent"</span></code> and <code class="docutils literal notranslate"><span class="pre">"target"</span></code>. An observation
|
|||
|
may look like <code class="docutils literal notranslate"><span class="pre">{"agent":</span> <span class="pre">array([1,</span> <span class="pre">0]),</span> <span class="pre">"target":</span> <span class="pre">array([0,</span> <span class="pre">3])}</span></code>.
|
|||
|
Since we have 4 actions in our environment (“right”, “up”, “left”,
|
|||
|
“down”), we will use <code class="docutils literal notranslate"><span class="pre">Discrete(4)</span></code> as an action space. Here is the
|
|||
|
declaration of <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code> and the implementation of <code class="docutils literal notranslate"><span class="pre">__init__</span></code>:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># gymnasium_env/envs/grid_world.py</span>
|
|||
|
<span class="kn">from</span><span class="w"> </span><span class="nn">enum</span><span class="w"> </span><span class="kn">import</span> <span class="n">Enum</span>
|
|||
|
|
|||
|
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
|
|||
|
<span class="kn">import</span><span class="w"> </span><span class="nn">pygame</span>
|
|||
|
|
|||
|
<span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">gym</span>
|
|||
|
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium</span><span class="w"> </span><span class="kn">import</span> <span class="n">spaces</span>
|
|||
|
|
|||
|
|
|||
|
<span class="k">class</span><span class="w"> </span><span class="nc">Actions</span><span class="p">(</span><span class="n">Enum</span><span class="p">):</span>
|
|||
|
<span class="n">RIGHT</span> <span class="o">=</span> <span class="mi">0</span>
|
|||
|
<span class="n">UP</span> <span class="o">=</span> <span class="mi">1</span>
|
|||
|
<span class="n">LEFT</span> <span class="o">=</span> <span class="mi">2</span>
|
|||
|
<span class="n">DOWN</span> <span class="o">=</span> <span class="mi">3</span>
|
|||
|
|
|||
|
|
|||
|
<span class="k">class</span><span class="w"> </span><span class="nc">GridWorldEnv</span><span class="p">(</span><span class="n">gym</span><span class="o">.</span><span class="n">Env</span><span class="p">):</span>
|
|||
|
<span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"render_modes"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"human"</span><span class="p">,</span> <span class="s2">"rgb_array"</span><span class="p">],</span> <span class="s2">"render_fps"</span><span class="p">:</span> <span class="mi">4</span><span class="p">}</span>
|
|||
|
|
|||
|
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">=</span> <span class="n">size</span> <span class="c1"># The size of the square grid</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">window_size</span> <span class="o">=</span> <span class="mi">512</span> <span class="c1"># The size of the PyGame window</span>
|
|||
|
|
|||
|
<span class="c1"># Observations are dictionaries with the agent's and the target's location.</span>
|
|||
|
<span class="c1"># Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Dict</span><span class="p">(</span>
|
|||
|
<span class="p">{</span>
|
|||
|
<span class="s2">"agent"</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Box</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">size</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">),</span>
|
|||
|
<span class="s2">"target"</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Box</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">size</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">),</span>
|
|||
|
<span class="p">}</span>
|
|||
|
<span class="p">)</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
|||
|
|
|||
|
<span class="c1"># We have 4 actions, corresponding to "right", "up", "left", "down"</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">action_space</span> <span class="o">=</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Discrete</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
|
|||
|
|
|||
|
<span class="w"> </span><span class="sd">"""</span>
|
|||
|
<span class="sd"> The following dictionary maps abstract actions from `self.action_space` to</span>
|
|||
|
<span class="sd"> the direction we will walk in if that action is taken.</span>
|
|||
|
<span class="sd"> i.e. 0 corresponds to "right", 1 to "up" etc.</span>
|
|||
|
<span class="sd"> """</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_action_to_direction</span> <span class="o">=</span> <span class="p">{</span>
|
|||
|
<span class="n">Actions</span><span class="o">.</span><span class="n">RIGHT</span><span class="o">.</span><span class="n">value</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]),</span>
|
|||
|
<span class="n">Actions</span><span class="o">.</span><span class="n">UP</span><span class="o">.</span><span class="n">value</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]),</span>
|
|||
|
<span class="n">Actions</span><span class="o">.</span><span class="n">LEFT</span><span class="o">.</span><span class="n">value</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]),</span>
|
|||
|
<span class="n">Actions</span><span class="o">.</span><span class="n">DOWN</span><span class="o">.</span><span class="n">value</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">]),</span>
|
|||
|
<span class="p">}</span>
|
|||
|
|
|||
|
<span class="k">assert</span> <span class="n">render_mode</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">render_mode</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"render_modes"</span><span class="p">]</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">=</span> <span class="n">render_mode</span>
|
|||
|
|
|||
|
<span class="w"> </span><span class="sd">"""</span>
|
|||
|
<span class="sd"> If human-rendering is used, `self.window` will be a reference</span>
|
|||
|
<span class="sd"> to the window that we draw to. `self.clock` will be a clock that is used</span>
|
|||
|
<span class="sd"> to ensure that the environment is rendered at the correct framerate in</span>
|
|||
|
<span class="sd"> human-mode. They will remain `None` until human-mode is used for the</span>
|
|||
|
<span class="sd"> first time.</span>
|
|||
|
<span class="sd"> """</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="o">=</span> <span class="kc">None</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">clock</span> <span class="o">=</span> <span class="kc">None</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
<section id="constructing-observations-from-environment-states">
|
|||
|
<h3>Constructing Observations From Environment States<a class="headerlink" href="#constructing-observations-from-environment-states" title="Link to this heading">¶</a></h3>
|
|||
|
<p>Since we will need to compute observations both in <code class="docutils literal notranslate"><span class="pre">reset</span></code> and
|
|||
|
<code class="docutils literal notranslate"><span class="pre">step</span></code>, it is often convenient to have a (private) method <code class="docutils literal notranslate"><span class="pre">_get_obs</span></code>
|
|||
|
that translates the environment’s state into an observation. However,
|
|||
|
this is not mandatory and you may as well compute observations in
|
|||
|
<code class="docutils literal notranslate"><span class="pre">reset</span></code> and <code class="docutils literal notranslate"><span class="pre">step</span></code> separately:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">_get_obs</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|||
|
<span class="k">return</span> <span class="p">{</span><span class="s2">"agent"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span><span class="p">,</span> <span class="s2">"target"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">}</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>We can also implement a similar method for the auxiliary information
|
|||
|
that is returned by <code class="docutils literal notranslate"><span class="pre">step</span></code> and <code class="docutils literal notranslate"><span class="pre">reset</span></code>. In our case, we would like
|
|||
|
to provide the manhattan distance between the agent and the target:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">_get_info</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|||
|
<span class="k">return</span> <span class="p">{</span>
|
|||
|
<span class="s2">"distance"</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">,</span> <span class="nb">ord</span><span class="o">=</span><span class="mi">1</span>
|
|||
|
<span class="p">)</span>
|
|||
|
<span class="p">}</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>Oftentimes, info will also contain some data that is only available
|
|||
|
inside the <code class="docutils literal notranslate"><span class="pre">step</span></code> method (e.g., individual reward terms). In that case,
|
|||
|
we would have to update the dictionary that is returned by <code class="docutils literal notranslate"><span class="pre">_get_info</span></code>
|
|||
|
in <code class="docutils literal notranslate"><span class="pre">step</span></code>.</p>
|
|||
|
</section>
|
|||
|
<section id="reset">
|
|||
|
<h3>Reset<a class="headerlink" href="#reset" title="Link to this heading">¶</a></h3>
|
|||
|
<p>The <code class="docutils literal notranslate"><span class="pre">reset</span></code> method will be called to initiate a new episode. You may
|
|||
|
assume that the <code class="docutils literal notranslate"><span class="pre">step</span></code> method will not be called before <code class="docutils literal notranslate"><span class="pre">reset</span></code> has
|
|||
|
been called. Moreover, <code class="docutils literal notranslate"><span class="pre">reset</span></code> should be called whenever a done signal
|
|||
|
has been issued. Users may pass the <code class="docutils literal notranslate"><span class="pre">seed</span></code> keyword to <code class="docutils literal notranslate"><span class="pre">reset</span></code> to
|
|||
|
initialize any random number generator that is used by the environment
|
|||
|
to a deterministic state. It is recommended to use the random number
|
|||
|
generator <code class="docutils literal notranslate"><span class="pre">self.np_random</span></code> that is provided by the environment’s base
|
|||
|
class, <code class="docutils literal notranslate"><span class="pre">gymnasium.Env</span></code>. If you only use this RNG, you do not need to
|
|||
|
worry much about seeding, <em>but you need to remember to call
|
|||
|
``super().reset(seed=seed)``</em> to make sure that <code class="docutils literal notranslate"><span class="pre">gymnasium.Env</span></code>
|
|||
|
correctly seeds the RNG. Once this is done, we can randomly set the
|
|||
|
state of our environment. In our case, we randomly choose the agent’s
|
|||
|
location and the random sample target positions, until it does not
|
|||
|
coincide with the agent’s position.</p>
|
|||
|
<p>The <code class="docutils literal notranslate"><span class="pre">reset</span></code> method should return a tuple of the initial observation
|
|||
|
and some auxiliary information. We can use the methods <code class="docutils literal notranslate"><span class="pre">_get_obs</span></code> and
|
|||
|
<code class="docutils literal notranslate"><span class="pre">_get_info</span></code> that we implemented earlier for that:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|||
|
<span class="c1"># We need the following line to seed self.np_random</span>
|
|||
|
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
|||
|
|
|||
|
<span class="c1"># Choose the agent's location uniformly at random</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
|||
|
|
|||
|
<span class="c1"># We will sample the target's location randomly until it does not coincide with the agent's location</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span>
|
|||
|
<span class="k">while</span> <span class="n">np</span><span class="o">.</span><span class="n">array_equal</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span><span class="p">):</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span>
|
|||
|
<span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span>
|
|||
|
<span class="p">)</span>
|
|||
|
|
|||
|
<span class="n">observation</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_obs</span><span class="p">()</span>
|
|||
|
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_info</span><span class="p">()</span>
|
|||
|
|
|||
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_render_frame</span><span class="p">()</span>
|
|||
|
|
|||
|
<span class="k">return</span> <span class="n">observation</span><span class="p">,</span> <span class="n">info</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
<section id="step">
|
|||
|
<h3>Step<a class="headerlink" href="#step" title="Link to this heading">¶</a></h3>
|
|||
|
<p>The <code class="docutils literal notranslate"><span class="pre">step</span></code> method usually contains most of the logic of your
|
|||
|
environment. It accepts an <code class="docutils literal notranslate"><span class="pre">action</span></code>, computes the state of the
|
|||
|
environment after applying that action and returns the 5-tuple
|
|||
|
<code class="docutils literal notranslate"><span class="pre">(observation,</span> <span class="pre">reward,</span> <span class="pre">terminated,</span> <span class="pre">truncated,</span> <span class="pre">info)</span></code>. See
|
|||
|
<a class="reference internal" href="../../../api/env/#gymnasium.Env.step" title="gymnasium.Env.step"><code class="xref py py-meth docutils literal notranslate"><span class="pre">gymnasium.Env.step()</span></code></a>. Once the new state of the environment has
|
|||
|
been computed, we can check whether it is a terminal state and we set
|
|||
|
<code class="docutils literal notranslate"><span class="pre">done</span></code> accordingly. Since we are using sparse binary rewards in
|
|||
|
<code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code>, computing <code class="docutils literal notranslate"><span class="pre">reward</span></code> is trivial once we know
|
|||
|
<code class="docutils literal notranslate"><span class="pre">done</span></code>.To gather <code class="docutils literal notranslate"><span class="pre">observation</span></code> and <code class="docutils literal notranslate"><span class="pre">info</span></code>, we can again make
|
|||
|
use of <code class="docutils literal notranslate"><span class="pre">_get_obs</span></code> and <code class="docutils literal notranslate"><span class="pre">_get_info</span></code>:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">):</span>
|
|||
|
<span class="c1"># Map the action (element of {0,1,2,3}) to the direction we walk in</span>
|
|||
|
<span class="n">direction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_action_to_direction</span><span class="p">[</span><span class="n">action</span><span class="p">]</span>
|
|||
|
<span class="c1"># We use `np.clip` to make sure we don't leave the grid</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">+</span> <span class="n">direction</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">-</span> <span class="mi">1</span>
|
|||
|
<span class="p">)</span>
|
|||
|
<span class="c1"># An episode is done iff the agent has reached the target</span>
|
|||
|
<span class="n">terminated</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array_equal</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">)</span>
|
|||
|
<span class="n">reward</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">terminated</span> <span class="k">else</span> <span class="mi">0</span> <span class="c1"># Binary sparse rewards</span>
|
|||
|
<span class="n">observation</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_obs</span><span class="p">()</span>
|
|||
|
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_info</span><span class="p">()</span>
|
|||
|
|
|||
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">_render_frame</span><span class="p">()</span>
|
|||
|
|
|||
|
<span class="k">return</span> <span class="n">observation</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="n">info</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
<section id="rendering">
|
|||
|
<h3>Rendering<a class="headerlink" href="#rendering" title="Link to this heading">¶</a></h3>
|
|||
|
<p>Here, we are using PyGame for rendering. A similar approach to rendering
|
|||
|
is used in many environments that are included with Gymnasium and you
|
|||
|
can use it as a skeleton for your own environments:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|||
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"rgb_array"</span><span class="p">:</span>
|
|||
|
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_render_frame</span><span class="p">()</span>
|
|||
|
|
|||
|
<span class="k">def</span><span class="w"> </span><span class="nf">_render_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|||
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="o">=</span> <span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">set_mode</span><span class="p">(</span>
|
|||
|
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">)</span>
|
|||
|
<span class="p">)</span>
|
|||
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">clock</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">clock</span> <span class="o">=</span> <span class="n">pygame</span><span class="o">.</span><span class="n">time</span><span class="o">.</span><span class="n">Clock</span><span class="p">()</span>
|
|||
|
|
|||
|
<span class="n">canvas</span> <span class="o">=</span> <span class="n">pygame</span><span class="o">.</span><span class="n">Surface</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">))</span>
|
|||
|
<span class="n">canvas</span><span class="o">.</span><span class="n">fill</span><span class="p">((</span><span class="mi">255</span><span class="p">,</span> <span class="mi">255</span><span class="p">,</span> <span class="mi">255</span><span class="p">))</span>
|
|||
|
<span class="n">pix_square_size</span> <span class="o">=</span> <span class="p">(</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">window_size</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span>
|
|||
|
<span class="p">)</span> <span class="c1"># The size of a single grid square in pixels</span>
|
|||
|
|
|||
|
<span class="c1"># First we draw the target</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">rect</span><span class="p">(</span>
|
|||
|
<span class="n">canvas</span><span class="p">,</span>
|
|||
|
<span class="p">(</span><span class="mi">255</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">Rect</span><span class="p">(</span>
|
|||
|
<span class="n">pix_square_size</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">,</span>
|
|||
|
<span class="p">(</span><span class="n">pix_square_size</span><span class="p">,</span> <span class="n">pix_square_size</span><span class="p">),</span>
|
|||
|
<span class="p">),</span>
|
|||
|
<span class="p">)</span>
|
|||
|
<span class="c1"># Now we draw the agent</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">circle</span><span class="p">(</span>
|
|||
|
<span class="n">canvas</span><span class="p">,</span>
|
|||
|
<span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">255</span><span class="p">),</span>
|
|||
|
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span> <span class="o">*</span> <span class="n">pix_square_size</span><span class="p">,</span>
|
|||
|
<span class="n">pix_square_size</span> <span class="o">/</span> <span class="mi">3</span><span class="p">,</span>
|
|||
|
<span class="p">)</span>
|
|||
|
|
|||
|
<span class="c1"># Finally, add some gridlines</span>
|
|||
|
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span><span class="p">):</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">line</span><span class="p">(</span>
|
|||
|
<span class="n">canvas</span><span class="p">,</span>
|
|||
|
<span class="mi">0</span><span class="p">,</span>
|
|||
|
<span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">),</span>
|
|||
|
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">,</span> <span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">),</span>
|
|||
|
<span class="n">width</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
|
|||
|
<span class="p">)</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">line</span><span class="p">(</span>
|
|||
|
<span class="n">canvas</span><span class="p">,</span>
|
|||
|
<span class="mi">0</span><span class="p">,</span>
|
|||
|
<span class="p">(</span><span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span>
|
|||
|
<span class="p">(</span><span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">),</span>
|
|||
|
<span class="n">width</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
|
|||
|
<span class="p">)</span>
|
|||
|
|
|||
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
|||
|
<span class="c1"># The following line copies our drawings from `canvas` to the visible window</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">window</span><span class="o">.</span><span class="n">blit</span><span class="p">(</span><span class="n">canvas</span><span class="p">,</span> <span class="n">canvas</span><span class="o">.</span><span class="n">get_rect</span><span class="p">())</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">event</span><span class="o">.</span><span class="n">pump</span><span class="p">()</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">update</span><span class="p">()</span>
|
|||
|
|
|||
|
<span class="c1"># We need to ensure that human-rendering occurs at the predefined framerate.</span>
|
|||
|
<span class="c1"># The following line will automatically add a delay to keep the framerate stable.</span>
|
|||
|
<span class="bp">self</span><span class="o">.</span><span class="n">clock</span><span class="o">.</span><span class="n">tick</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"render_fps"</span><span class="p">])</span>
|
|||
|
<span class="k">else</span><span class="p">:</span> <span class="c1"># rgb_array</span>
|
|||
|
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span>
|
|||
|
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">pygame</span><span class="o">.</span><span class="n">surfarray</span><span class="o">.</span><span class="n">pixels3d</span><span class="p">(</span><span class="n">canvas</span><span class="p">)),</span> <span class="n">axes</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
|
|||
|
<span class="p">)</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
<section id="close">
|
|||
|
<h3>Close<a class="headerlink" href="#close" title="Link to this heading">¶</a></h3>
|
|||
|
<p>The <code class="docutils literal notranslate"><span class="pre">close</span></code> method should close any open resources that were used by
|
|||
|
the environment. In many cases, you don’t actually have to bother to
|
|||
|
implement this method. However, in our example <code class="docutils literal notranslate"><span class="pre">render_mode</span></code> may be
|
|||
|
<code class="docutils literal notranslate"><span class="pre">"human"</span></code> and we might need to close the window that has been opened:</p>
|
|||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|||
|
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">quit</span><span class="p">()</span>
|
|||
|
<span class="n">pygame</span><span class="o">.</span><span class="n">quit</span><span class="p">()</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>In other environments <code class="docutils literal notranslate"><span class="pre">close</span></code> might also close files that were opened
|
|||
|
or release other resources. You shouldn’t interact with the environment
|
|||
|
after having called <code class="docutils literal notranslate"><span class="pre">close</span></code>.</p>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
<section id="registering-envs">
|
|||
|
<h2>Registering Envs<a class="headerlink" href="#registering-envs" title="Link to this heading">¶</a></h2>
|
|||
|
<p>In order for the custom environments to be detected by Gymnasium, they
|
|||
|
must be registered as follows. We will choose to put this code in
|
|||
|
<code class="docutils literal notranslate"><span class="pre">gymnasium_env/__init__.py</span></code>.</p>
|
|||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.envs.registration</span><span class="w"> </span><span class="kn">import</span> <span class="n">register</span>
|
|||
|
|
|||
|
<span class="n">register</span><span class="p">(</span>
|
|||
|
<span class="nb">id</span><span class="o">=</span><span class="s2">"gymnasium_env/GridWorld-v0"</span><span class="p">,</span>
|
|||
|
<span class="n">entry_point</span><span class="o">=</span><span class="s2">"gymnasium_env.envs:GridWorldEnv"</span><span class="p">,</span>
|
|||
|
<span class="p">)</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>The environment ID consists of three components, two of which are
|
|||
|
optional: an optional namespace (here: <code class="docutils literal notranslate"><span class="pre">gymnasium_env</span></code>), a mandatory
|
|||
|
name (here: <code class="docutils literal notranslate"><span class="pre">GridWorld</span></code>) and an optional but recommended version
|
|||
|
(here: v0). It might have also been registered as <code class="docutils literal notranslate"><span class="pre">GridWorld-v0</span></code> (the
|
|||
|
recommended approach), <code class="docutils literal notranslate"><span class="pre">GridWorld</span></code> or <code class="docutils literal notranslate"><span class="pre">gymnasium_env/GridWorld</span></code>, and
|
|||
|
the appropriate ID should then be used during environment creation.</p>
|
|||
|
<p>The keyword argument <code class="docutils literal notranslate"><span class="pre">max_episode_steps=300</span></code> will ensure that
|
|||
|
GridWorld environments that are instantiated via <code class="docutils literal notranslate"><span class="pre">gymnasium.make</span></code> will
|
|||
|
be wrapped in a <code class="docutils literal notranslate"><span class="pre">TimeLimit</span></code> wrapper (see <a class="reference external" href="/api/wrappers">the wrapper
|
|||
|
documentation</a> for more information). A done signal
|
|||
|
will then be produced if the agent has reached the target <em>or</em> 300 steps
|
|||
|
have been executed in the current episode. To distinguish truncation and
|
|||
|
termination, you can check <code class="docutils literal notranslate"><span class="pre">info["TimeLimit.truncated"]</span></code>.</p>
|
|||
|
<p>Apart from <code class="docutils literal notranslate"><span class="pre">id</span></code> and <code class="docutils literal notranslate"><span class="pre">entrypoint</span></code>, you may pass the following
|
|||
|
additional keyword arguments to <code class="docutils literal notranslate"><span class="pre">register</span></code>:</p>
|
|||
|
<div class="table-wrapper docutils container">
|
|||
|
<table class="docutils align-default">
|
|||
|
<thead>
|
|||
|
<tr class="row-odd"><th class="head"><p>Name</p></th>
|
|||
|
<th class="head"><p>Type</p></th>
|
|||
|
<th class="head"><p>Default</p></th>
|
|||
|
<th class="head"><p>Description</p></th>
|
|||
|
</tr>
|
|||
|
</thead>
|
|||
|
<tbody>
|
|||
|
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">reward_threshold</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">float</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">None</span></code></p></td>
|
|||
|
<td><p>The reward threshold before the task is considered solved</p></td>
|
|||
|
</tr>
|
|||
|
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">nondeterministic</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">bool</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">False</span></code></p></td>
|
|||
|
<td><p>Whether this environment is non-deterministic even after seeding</p></td>
|
|||
|
</tr>
|
|||
|
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">max_episode_steps</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">int</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">None</span></code></p></td>
|
|||
|
<td><p>The maximum number of steps that an episode can consist of. If not <code class="docutils literal notranslate"><span class="pre">None</span></code>, a <code class="docutils literal notranslate"><span class="pre">TimeLimit</span></code> wrapper is added</p></td>
|
|||
|
</tr>
|
|||
|
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">order_enforce</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">bool</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">True</span></code></p></td>
|
|||
|
<td><p>Whether to wrap the environment in an <code class="docutils literal notranslate"><span class="pre">OrderEnforcing</span></code> wrapper</p></td>
|
|||
|
</tr>
|
|||
|
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">kwargs</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">dict</span></code></p></td>
|
|||
|
<td><p><code class="docutils literal notranslate"><span class="pre">{}</span></code></p></td>
|
|||
|
<td><p>The default kwargs to pass to the environment class</p></td>
|
|||
|
</tr>
|
|||
|
</tbody>
|
|||
|
</table>
|
|||
|
</div>
|
|||
|
<p>Most of these keywords (except for <code class="docutils literal notranslate"><span class="pre">max_episode_steps</span></code>,
|
|||
|
<code class="docutils literal notranslate"><span class="pre">order_enforce</span></code> and <code class="docutils literal notranslate"><span class="pre">kwargs</span></code>) do not alter the behavior of
|
|||
|
environment instances but merely provide some extra information about
|
|||
|
your environment. After registration, our custom <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code>
|
|||
|
environment can be created with
|
|||
|
<code class="docutils literal notranslate"><span class="pre">env</span> <span class="pre">=</span> <span class="pre">gymnasium.make('gymnasium_env/GridWorld-v0')</span></code>.</p>
|
|||
|
<p><code class="docutils literal notranslate"><span class="pre">gymnasium_env/envs/__init__.py</span></code> should have:</p>
|
|||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium_env.envs.grid_world</span><span class="w"> </span><span class="kn">import</span> <span class="n">GridWorldEnv</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>If your environment is not registered, you may optionally pass a module
|
|||
|
to import, that would register your environment before creating it like
|
|||
|
this - <code class="docutils literal notranslate"><span class="pre">env</span> <span class="pre">=</span> <span class="pre">gymnasium.make('module:Env-v0')</span></code>, where <code class="docutils literal notranslate"><span class="pre">module</span></code>
|
|||
|
contains the registration code. For the GridWorld env, the registration
|
|||
|
code is run by importing <code class="docutils literal notranslate"><span class="pre">gymnasium_env</span></code> so if it were not possible to
|
|||
|
import gymnasium_env explicitly, you could register while making by
|
|||
|
<code class="docutils literal notranslate"><span class="pre">env</span> <span class="pre">=</span> <span class="pre">gymnasium.make('gymnasium_env:gymnasium_env/GridWorld-v0')</span></code>. This
|
|||
|
is especially useful when you’re allowed to pass only the environment ID
|
|||
|
into a third-party codebase (eg. learning library). This lets you
|
|||
|
register your environment without needing to edit the library’s source
|
|||
|
code.</p>
|
|||
|
</section>
|
|||
|
<section id="creating-a-package">
|
|||
|
<h2>Creating a Package<a class="headerlink" href="#creating-a-package" title="Link to this heading">¶</a></h2>
|
|||
|
<p>The last step is to structure our code as a Python package. This
|
|||
|
involves configuring <code class="docutils literal notranslate"><span class="pre">pyproject.toml</span></code>. A minimal example of how
|
|||
|
to do so is as follows:</p>
|
|||
|
<div class="highlight-toml notranslate"><div class="highlight"><pre><span></span><span class="k">[build-system]</span>
|
|||
|
<span class="n">requires</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span><span class="s2">"hatchling"</span><span class="p">]</span>
|
|||
|
<span class="n">build-backend</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"hatchling.build"</span>
|
|||
|
|
|||
|
<span class="k">[project]</span>
|
|||
|
<span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"gymnasium_env"</span>
|
|||
|
<span class="n">version</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"0.0.1"</span>
|
|||
|
<span class="n">dependencies</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span>
|
|||
|
<span class="w"> </span><span class="s2">"gymnasium"</span><span class="p">,</span>
|
|||
|
<span class="w"> </span><span class="s2">"pygame==2.1.3"</span><span class="p">,</span>
|
|||
|
<span class="w"> </span><span class="s2">"pre-commit"</span><span class="p">,</span>
|
|||
|
<span class="p">]</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
<section id="creating-environment-instances">
|
|||
|
<h2>Creating Environment Instances<a class="headerlink" href="#creating-environment-instances" title="Link to this heading">¶</a></h2>
|
|||
|
<p>Now you can install your package locally with:</p>
|
|||
|
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">pip install -e .</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>And you can create an instance of the environment via:</p>
|
|||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># run_gymnasium_env.py</span>
|
|||
|
|
|||
|
<span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium</span>
|
|||
|
<span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium_env</span>
|
|||
|
<span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gymnasium_env/GridWorld-v0'</span><span class="p">)</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>You can also pass keyword arguments of your environment’s constructor to
|
|||
|
<code class="docutils literal notranslate"><span class="pre">gymnasium.make</span></code> to customize the environment. In our case, we could
|
|||
|
do:</p>
|
|||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gymnasium_env/GridWorld-v0'</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>Sometimes, you may find it more convenient to skip registration and call
|
|||
|
the environment’s constructor yourself. Some may find this approach more
|
|||
|
pythonic and environments that are instantiated like this are also
|
|||
|
perfectly fine (but remember to add wrappers as well!).</p>
|
|||
|
</section>
|
|||
|
<section id="using-wrappers">
|
|||
|
<h2>Using Wrappers<a class="headerlink" href="#using-wrappers" title="Link to this heading">¶</a></h2>
|
|||
|
<p>Oftentimes, we want to use different variants of a custom environment,
|
|||
|
or we want to modify the behavior of an environment that is provided by
|
|||
|
Gymnasium or some other party. Wrappers allow us to do this without
|
|||
|
changing the environment implementation or adding any boilerplate code.
|
|||
|
Check out the <a class="reference external" href="/api/wrappers/">wrapper documentation</a> for details on
|
|||
|
how to use wrappers and instructions for implementing your own. In our
|
|||
|
example, observations cannot be used directly in learning code because
|
|||
|
they are dictionaries. However, we don’t actually need to touch our
|
|||
|
environment implementation to fix this! We can simply add a wrapper on
|
|||
|
top of environment instances to flatten observations into a single
|
|||
|
array:</p>
|
|||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium</span>
|
|||
|
<span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium_env</span>
|
|||
|
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium.wrappers</span><span class="w"> </span><span class="kn">import</span> <span class="n">FlattenObservation</span>
|
|||
|
|
|||
|
<span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gymnasium_env/GridWorld-v0'</span><span class="p">)</span>
|
|||
|
<span class="n">wrapped_env</span> <span class="o">=</span> <span class="n">FlattenObservation</span><span class="p">(</span><span class="n">env</span><span class="p">)</span>
|
|||
|
<span class="nb">print</span><span class="p">(</span><span class="n">wrapped_env</span><span class="o">.</span><span class="n">reset</span><span class="p">())</span> <span class="c1"># E.g. [3 0 3 3], {}</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<p>Wrappers have the big advantage that they make environments highly
|
|||
|
modular. For instance, instead of flattening the observations from
|
|||
|
GridWorld, you might only want to look at the relative position of the
|
|||
|
target and the agent. In the section on
|
|||
|
<a class="reference external" href="/api/wrappers/observation_wrappers/#observation-wrappers">ObservationWrappers</a> we have
|
|||
|
implemented a wrapper that does this job. This wrapper is also available
|
|||
|
in <code class="docutils literal notranslate"><span class="pre">gymnasium_env/wrappers/relative_position.py</span></code>:</p>
|
|||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium</span>
|
|||
|
<span class="kn">import</span><span class="w"> </span><span class="nn">gymnasium_env</span>
|
|||
|
<span class="kn">from</span><span class="w"> </span><span class="nn">gymnasium_env.wrappers</span><span class="w"> </span><span class="kn">import</span> <span class="n">RelativePosition</span>
|
|||
|
|
|||
|
<span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gymnasium_env/GridWorld-v0'</span><span class="p">)</span>
|
|||
|
<span class="n">wrapped_env</span> <span class="o">=</span> <span class="n">RelativePosition</span><span class="p">(</span><span class="n">env</span><span class="p">)</span>
|
|||
|
<span class="nb">print</span><span class="p">(</span><span class="n">wrapped_env</span><span class="o">.</span><span class="n">reset</span><span class="p">())</span> <span class="c1"># E.g. [-3 3], {}</span>
|
|||
|
</pre></div>
|
|||
|
</div>
|
|||
|
<div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-gymnasium-basics-environment-creation-py">
|
|||
|
<div class="sphx-glr-download sphx-glr-download-python docutils container">
|
|||
|
<p><a class="reference download internal" download="" href="../../../_downloads/56585a5841cc0f2c5a3dea777f5b14f0/environment_creation.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">environment_creation.py</span></code></a></p>
|
|||
|
</div>
|
|||
|
<div class="sphx-glr-download sphx-glr-download-jupyter docutils container">
|
|||
|
<p><a class="reference download internal" download="" href="../../../_downloads/0f28446f9f426c9833f40d61857a6f21/environment_creation.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">environment_creation.ipynb</span></code></a></p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</section>
|
|||
|
</section>
|
|||
|
|
|||
|
</article>
|
|||
|
</div>
|
|||
|
<footer>
|
|||
|
|
|||
|
<div class="related-pages">
|
|||
|
<a class="next-page" href="../vector_envs_tutorial/">
|
|||
|
<div class="page-info">
|
|||
|
<div class="context">
|
|||
|
<span>Next</span>
|
|||
|
</div>
|
|||
|
<div class="title">Training A2C with Vector Envs and Domain Randomization</div>
|
|||
|
</div>
|
|||
|
<svg class="furo-related-icon">
|
|||
|
<use href="#svg-arrow-right"></use>
|
|||
|
</svg>
|
|||
|
</a>
|
|||
|
<a class="prev-page" href="../implementing_custom_wrappers/">
|
|||
|
<svg class="furo-related-icon">
|
|||
|
<use href="#svg-arrow-right"></use>
|
|||
|
</svg>
|
|||
|
<div class="page-info">
|
|||
|
<div class="context">
|
|||
|
<span>Previous</span>
|
|||
|
</div>
|
|||
|
|
|||
|
<div class="title">Implementing Custom Wrappers</div>
|
|||
|
|
|||
|
</div>
|
|||
|
</a>
|
|||
|
</div>
|
|||
|
<div class="bottom-of-page">
|
|||
|
<div class="left-details">
|
|||
|
<div class="copyright">
|
|||
|
Copyright © 2025 Farama Foundation
|
|||
|
</div>
|
|||
|
<!--
|
|||
|
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
|
|||
|
|
|||
|
<a href="https://github.com/pradyunsg/furo">Furo</a>
|
|||
|
-->
|
|||
|
</div>
|
|||
|
<div class="right-details">
|
|||
|
<div class="icons">
|
|||
|
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/"
|
|||
|
aria-label="On GitHub">
|
|||
|
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
|
|||
|
<path fill-rule="evenodd"
|
|||
|
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z">
|
|||
|
</path>
|
|||
|
</svg>
|
|||
|
</a>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
|
|||
|
</footer>
|
|||
|
</div>
|
|||
|
<aside class="toc-drawer">
|
|||
|
|
|||
|
|
|||
|
<div class="toc-sticky toc-scroll">
|
|||
|
<div class="toc-title-container">
|
|||
|
<span class="toc-title">
|
|||
|
On this page
|
|||
|
</span>
|
|||
|
</div>
|
|||
|
<div class="toc-tree-container">
|
|||
|
<div class="toc-tree">
|
|||
|
<ul>
|
|||
|
<li><a class="reference internal" href="#">Make your own custom environment</a><ul>
|
|||
|
<li><a class="reference internal" href="#setup">Setup</a><ul>
|
|||
|
<li><a class="reference internal" href="#recommended-solution">Recommended solution</a></li>
|
|||
|
<li><a class="reference internal" href="#alternative-solutions">Alternative solutions</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#generate-your-environment">Generate your environment</a></li>
|
|||
|
<li><a class="reference internal" href="#subclassing-gymnasium-env">Subclassing gymnasium.Env</a><ul>
|
|||
|
<li><a class="reference internal" href="#declaration-and-initialization">Declaration and Initialization</a></li>
|
|||
|
<li><a class="reference internal" href="#constructing-observations-from-environment-states">Constructing Observations From Environment States</a></li>
|
|||
|
<li><a class="reference internal" href="#reset">Reset</a></li>
|
|||
|
<li><a class="reference internal" href="#step">Step</a></li>
|
|||
|
<li><a class="reference internal" href="#rendering">Rendering</a></li>
|
|||
|
<li><a class="reference internal" href="#close">Close</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
<li><a class="reference internal" href="#registering-envs">Registering Envs</a></li>
|
|||
|
<li><a class="reference internal" href="#creating-a-package">Creating a Package</a></li>
|
|||
|
<li><a class="reference internal" href="#creating-environment-instances">Creating Environment Instances</a></li>
|
|||
|
<li><a class="reference internal" href="#using-wrappers">Using Wrappers</a></li>
|
|||
|
</ul>
|
|||
|
</li>
|
|||
|
</ul>
|
|||
|
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
|
|||
|
|
|||
|
</aside>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<script>
|
|||
|
const toggleMenu = () => {
|
|||
|
const menuBtn = document.querySelector(".farama-header-menu__btn");
|
|||
|
const menuContainer = document.querySelector(".farama-header-menu-container");
|
|||
|
if (document.querySelector(".farama-header-menu").classList.contains("active")) {
|
|||
|
menuBtn.setAttribute("aria-expanded", "false");
|
|||
|
menuContainer.setAttribute("aria-hidden", "true");
|
|||
|
} else {
|
|||
|
menuBtn.setAttribute("aria-expanded", "true");
|
|||
|
menuContainer.setAttribute("aria-hidden", "false");
|
|||
|
}
|
|||
|
document.querySelector(".farama-header-menu").classList.toggle("active");
|
|||
|
}
|
|||
|
|
|||
|
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
|
|||
|
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
|
|||
|
</script>
|
|||
|
|
|||
|
|
|||
|
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
|
|||
|
<script>
|
|||
|
const enableGtag = () => {
|
|||
|
window.dataLayer = window.dataLayer || [];
|
|||
|
function gtag(){dataLayer.push(arguments);}
|
|||
|
gtag('js', new Date());
|
|||
|
gtag('config', 'G-6H9C8TWXZ8');
|
|||
|
}
|
|||
|
(() => {
|
|||
|
if (!localStorage.getItem("acceptedCookieAlert")) {
|
|||
|
const boxElem = document.createElement("div");
|
|||
|
boxElem.classList.add("cookie-alert");
|
|||
|
const containerElem = document.createElement("div");
|
|||
|
containerElem.classList.add("cookie-alert__container");
|
|||
|
const textElem = document.createElement("p");
|
|||
|
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
|
|||
|
Google Analytics</a> to collect statistics.`;
|
|||
|
containerElem.appendChild(textElem);
|
|||
|
|
|||
|
const declineBtn = Object.assign(document.createElement("button"),
|
|||
|
{
|
|||
|
innerText: "Deny",
|
|||
|
className: "farama-btn cookie-alert__button",
|
|||
|
id: "cookie-alert__decline",
|
|||
|
}
|
|||
|
);
|
|||
|
declineBtn.addEventListener("click", () => {
|
|||
|
localStorage.setItem("acceptedCookieAlert", false);
|
|||
|
boxElem.remove();
|
|||
|
});
|
|||
|
|
|||
|
const acceptBtn = Object.assign(document.createElement("button"),
|
|||
|
{
|
|||
|
innerText: "Allow",
|
|||
|
className: "farama-btn cookie-alert__button",
|
|||
|
id: "cookie-alert__accept",
|
|||
|
}
|
|||
|
);
|
|||
|
acceptBtn.addEventListener("click", () => {
|
|||
|
localStorage.setItem("acceptedCookieAlert", true);
|
|||
|
boxElem.remove();
|
|||
|
enableGtag();
|
|||
|
});
|
|||
|
|
|||
|
containerElem.appendChild(declineBtn);
|
|||
|
containerElem.appendChild(acceptBtn);
|
|||
|
boxElem.appendChild(containerElem);
|
|||
|
document.body.appendChild(boxElem);
|
|||
|
} else if (localStorage.getItem("acceptedCookieAlert") === "true") {
|
|||
|
enableGtag();
|
|||
|
}
|
|||
|
})()
|
|||
|
</script>
|
|||
|
|
|||
|
<script src="../../../_static/documentation_options.js?v=25d39d6f"></script>
|
|||
|
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|||
|
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|||
|
<script src="../../../_static/scripts/furo.js?v=7660844c"></script>
|
|||
|
|
|||
|
<script>
|
|||
|
|
|||
|
const createProjectsList = (projects, displayImages) => {
|
|||
|
const ulElem = Object.assign(document.createElement('ul'),
|
|||
|
{
|
|||
|
className:'farama-header-menu-list',
|
|||
|
}
|
|||
|
)
|
|||
|
for (let project of projects) {
|
|||
|
const liElem = document.createElement("li");
|
|||
|
const aElem = Object.assign(document.createElement("a"),
|
|||
|
{
|
|||
|
href: project.link
|
|||
|
}
|
|||
|
);
|
|||
|
liElem.appendChild(aElem);
|
|||
|
if (displayImages) {
|
|||
|
const imgElem = Object.assign(document.createElement("img"),
|
|||
|
{
|
|||
|
src: project.image ? imagesBasepath + project.image : imagesBasepath + "/farama_black.svg",
|
|||
|
alt: `${project.name} logo`,
|
|||
|
className: "farama-black-logo-invert"
|
|||
|
}
|
|||
|
);
|
|||
|
aElem.appendChild(imgElem);
|
|||
|
}
|
|||
|
aElem.appendChild(document.createTextNode(project.name));
|
|||
|
ulElem.appendChild(liElem);
|
|||
|
}
|
|||
|
return ulElem;
|
|||
|
}
|
|||
|
|
|||
|
// Create menu with Farama projects by using the API at farama.org/api/projects.json
|
|||
|
const createCORSRequest = (method, url) => {
|
|||
|
let xhr = new XMLHttpRequest();
|
|||
|
xhr.responseType = 'json';
|
|||
|
|
|||
|
if ("withCredentials" in xhr) {
|
|||
|
xhr.open(method, url, true);
|
|||
|
} else if (typeof XDomainRequest != "undefined") {
|
|||
|
// IE8 & IE9
|
|||
|
xhr = new XDomainRequest();
|
|||
|
xhr.open(method, url);
|
|||
|
} else {
|
|||
|
// CORS not supported.
|
|||
|
xhr = null;
|
|||
|
}
|
|||
|
return xhr;
|
|||
|
};
|
|||
|
|
|||
|
const url = 'https://farama.org/api/projects.json';
|
|||
|
const imagesBasepath = "https://farama.org/assets/images"
|
|||
|
const method = 'GET';
|
|||
|
let xhr = createCORSRequest(method, url);
|
|||
|
|
|||
|
xhr.onload = () => {
|
|||
|
const jsonResponse = xhr.response;
|
|||
|
const sections = {
|
|||
|
"Core Projects": [],
|
|||
|
"Mature Projects": {
|
|||
|
"Documentation": [],
|
|||
|
"Repositories": [],
|
|||
|
},
|
|||
|
"Incubating Projects": {
|
|||
|
"Documentation": [],
|
|||
|
"Repositories": [],
|
|||
|
},
|
|||
|
"Foundation": [
|
|||
|
{
|
|||
|
name: "About",
|
|||
|
link: "https://farama.org/about"
|
|||
|
},
|
|||
|
{
|
|||
|
name: "Standards",
|
|||
|
link: "https://farama.org/project_standards",
|
|||
|
},
|
|||
|
{
|
|||
|
name: "Donate",
|
|||
|
link: "https://farama.org/donations"
|
|||
|
}
|
|||
|
]
|
|||
|
}
|
|||
|
|
|||
|
// Categorize projects
|
|||
|
Object.keys(jsonResponse).forEach(key => {
|
|||
|
projectJson = jsonResponse[key];
|
|||
|
if (projectJson.website !== null) {
|
|||
|
projectJson.link = projectJson.website;
|
|||
|
} else {
|
|||
|
projectJson.link = projectJson.github;
|
|||
|
}
|
|||
|
if (projectJson.type === "core") {
|
|||
|
sections["Core Projects"].push(projectJson)
|
|||
|
} else if (projectJson.type == "mature") {
|
|||
|
if (projectJson.website !== null) {
|
|||
|
sections["Mature Projects"]["Documentation"].push(projectJson)
|
|||
|
} else {
|
|||
|
sections["Mature Projects"]["Repositories"].push(projectJson)
|
|||
|
}
|
|||
|
} else {
|
|||
|
if (projectJson.website !== null) {
|
|||
|
sections["Incubating Projects"]["Documentation"].push(projectJson)
|
|||
|
} else {
|
|||
|
sections["Incubating Projects"]["Repositories"].push(projectJson)
|
|||
|
}
|
|||
|
}
|
|||
|
})
|
|||
|
|
|||
|
const menuContainer = document.querySelector(".farama-header-menu__body");
|
|||
|
|
|||
|
Object.keys(sections).forEach((key, i) => {
|
|||
|
const sectionElem = Object.assign(
|
|||
|
document.createElement('div'), {
|
|||
|
className:'farama-header-menu__section',
|
|||
|
}
|
|||
|
)
|
|||
|
sectionElem.appendChild(Object.assign(document.createElement('span'),
|
|||
|
{
|
|||
|
className:'farama-header-menu__section-title' ,
|
|||
|
innerText: key
|
|||
|
}
|
|||
|
))
|
|||
|
// is not a list
|
|||
|
if (sections[key].constructor !== Array) {
|
|||
|
const subSections = sections[key];
|
|||
|
const subSectionContainerElem = Object.assign(
|
|||
|
document.createElement('div'), {
|
|||
|
className:'farama-header-menu__subsections-container',
|
|||
|
style: 'display: flex'
|
|||
|
}
|
|||
|
)
|
|||
|
Object.keys(subSections).forEach((subKey, i) => {
|
|||
|
const subSectionElem = Object.assign(
|
|||
|
document.createElement('div'), {
|
|||
|
className:'farama-header-menu__subsection',
|
|||
|
}
|
|||
|
)
|
|||
|
subSectionElem.appendChild(Object.assign(document.createElement('span'),
|
|||
|
{
|
|||
|
className:'farama-header-menu__subsection-title' ,
|
|||
|
innerText: subKey
|
|||
|
}
|
|||
|
))
|
|||
|
const ulElem = createProjectsList(subSections[subKey], key !== 'Foundation');
|
|||
|
subSectionElem.appendChild(ulElem);
|
|||
|
subSectionContainerElem.appendChild(subSectionElem);
|
|||
|
})
|
|||
|
sectionElem.appendChild(subSectionContainerElem);
|
|||
|
} else {
|
|||
|
const projects = sections[key];
|
|||
|
const ulElem = createProjectsList(projects, true);
|
|||
|
sectionElem.appendChild(ulElem);
|
|||
|
}
|
|||
|
menuContainer.appendChild(sectionElem)
|
|||
|
});
|
|||
|
}
|
|||
|
|
|||
|
xhr.onerror = function() {
|
|||
|
console.error("Unable to load projects");
|
|||
|
};
|
|||
|
|
|||
|
xhr.send();
|
|||
|
</script>
|
|||
|
|
|||
|
|
|||
|
<script>
|
|||
|
const versioningConfig = {
|
|||
|
githubUser: 'Farama-Foundation',
|
|||
|
githubRepo: 'Gymnasium',
|
|||
|
};
|
|||
|
fetch('/main/_static/versioning/versioning_menu.html').then(response => {
|
|||
|
if (response.status === 200) {
|
|||
|
response.text().then(text => {
|
|||
|
const container = document.createElement("div");
|
|||
|
container.innerHTML = text;
|
|||
|
document.querySelector("body").appendChild(container);
|
|||
|
// innerHtml doenst evaluate scripts, we need to add them dynamically
|
|||
|
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
|
|||
|
const newScript = document.createElement("script");
|
|||
|
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
|
|||
|
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
|
|||
|
oldScript.parentNode.replaceChild(newScript, oldScript);
|
|||
|
});
|
|||
|
});
|
|||
|
} else {
|
|||
|
console.warn("Unable to load versioning menu", response);
|
|||
|
}
|
|||
|
});
|
|||
|
</script>
|
|||
|
|
|||
|
</body>
|
|||
|
</html>
|