mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-21 14:30:27 +00:00
1211 lines
93 KiB
HTML
1211 lines
93 KiB
HTML
<!doctype html>
|
||
<html class="no-js" lang="en">
|
||
<head><meta charset="utf-8"/>
|
||
<meta name="viewport" content="width=device-width,initial-scale=1"/>
|
||
<meta name="color-scheme" content="light dark">
|
||
<meta name="description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)">
|
||
<meta property="og:title" content="Gymnasium Documentation" />
|
||
<meta property="og:type" content="website" />
|
||
<meta property="og:description" content="A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)" />
|
||
<meta property="og:url" content="https://gymnasium.farama.org/tutorials/environment_creation.html" /><meta property="og:image" content="https://gymnasium.farama.org/_static/img/gymnasium-github.png" /><meta name="twitter:card" content="summary_large_image"><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||
<link rel="index" title="Index" href="../../genindex/" /><link rel="search" title="Search" href="../../search/" /><link rel="next" title="Handling Time Limits" href="../handling_time_limits/" /><link rel="prev" title="Solving Blackjack with Q-Learning" href="../blackjack_tutorial/" />
|
||
<link rel="canonical" href="https://gymnasium.farama.org/tutorials/environment_creation.html" />
|
||
|
||
<link rel="shortcut icon" href="../../_static/favicon.png"/><meta name="generator" content="sphinx-5.3.0, furo 2022.09.15.dev1"/>
|
||
<title>Make your own custom environment - Gymnasium Documentation</title>
|
||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo.css?digest=9ec31e2665bf879c1d47d93a8ec4893870ee1e45" />
|
||
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo-extensions.css?digest=a614025deca43086db03c234d5a3a2047a0241ae" />
|
||
|
||
|
||
|
||
|
||
<style>
|
||
body {
|
||
--color-code-background: #f8f8f8;
|
||
--color-code-foreground: black;
|
||
|
||
}
|
||
@media not print {
|
||
body[data-theme="dark"] {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
@media (prefers-color-scheme: dark) {
|
||
body:not([data-theme="light"]) {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
}
|
||
}
|
||
</style></head>
|
||
<body>
|
||
<header class="farama-header">
|
||
<div class="farama-header__container">
|
||
<div class="farama-header__left">
|
||
<a href="../../">
|
||
<img class="farama-header__logo only-light" src="../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
|
||
<img class="farama-header__logo only-dark" src="../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
|
||
<h1 class="farama-header__title">Gymnasium Documentation</h1>
|
||
</a>
|
||
</div>
|
||
<div class="farama-header__right">
|
||
<div class="farama-header-menu">
|
||
<div class="farama-header-menu__btn">
|
||
<span class="farama-header-menu__btn-name">
|
||
Farama Foundation
|
||
</span>
|
||
<svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg" xmlns:bx="https://boxy-svg.com">
|
||
<defs></defs>
|
||
<path d="M 3 4.677 C 3 3.751 3.659 3 4.474 3 L 27.526 3 C 28.341 3 29 3.751 29 4.677 C 29 5.603 28.341 6.354 27.526 6.354 L 4.474 6.354 C 3.659 6.354 3 5.603 3 4.677 Z" bx:origin="0.622825 3.875593"></path>
|
||
<path d="M 3 16 C 3 15.074 3.659 14.323 4.474 14.323 L 27.526 14.323 C 28.341 14.323 29 15.074 29 16 C 29 16.926 28.341 17.677 27.526 17.677 L 4.474 17.677 C 3.659 17.677 3 16.926 3 16 Z" bx:origin="0.622825 0.5"></path>
|
||
<path d="M 3 27.323 C 3 26.397 3.659 25.646 4.474 25.646 L 27.526 25.646 C 28.341 25.646 29 26.397 29 27.323 C 29 28.249 28.341 29 27.526 29 L 4.474 29 C 3.659 29 3 28.249 3 27.323 Z" bx:origin="0.622825 -2.875591"></path>
|
||
</svg>
|
||
</div>
|
||
<div class="farama-header-menu-container">
|
||
<div class="farama-header-menu__header">
|
||
<a href="https://farama.org">
|
||
<img class="farama-header-menu__logo" src="../../_static/img/farama_solid_white.svg" alt="Farama Foundation">
|
||
<span>Farama Foundation</span>
|
||
</a>
|
||
<button id="farama-close-menu">
|
||
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
|
||
<line x1="3" y1="21" x2="21" y2="3"></line>
|
||
<line x1="3" y1="3" x2="21" y2="21"></line>
|
||
</svg>
|
||
</button>
|
||
</div>
|
||
<div class="farama-header-menu__body">
|
||
<div class="farama-header-menu__section" style="padding-left: 24px;" >
|
||
<span class="farama-header-menu__section-title">Documentation</span>
|
||
<ul class="farama-header-menu-list">
|
||
<li>
|
||
<a href="https://gymnasium.farama.org">
|
||
|
||
<img src="../../_static/img/gymnasium-white.svg">
|
||
|
||
Gymnasium
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://pettingzoo.farama.org">
|
||
|
||
<img src="../../_static/img/pettingzoo-white.svg">
|
||
|
||
PettingZoo
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://minigrid.farama.org">
|
||
|
||
<img src="../../_static/img/minigrid-white.svg">
|
||
|
||
MiniGrid
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://robotics.farama.org">
|
||
|
||
<img src="../../_static/img/gymrobotics-white.svg">
|
||
|
||
Gymnasium-Robotics
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
<div class="farama-header-menu__section" style="padding-left: 24px;" >
|
||
<span class="farama-header-menu__section-title">Mature Projects</span>
|
||
<ul class="farama-header-menu-list">
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/SuperSuit">
|
||
|
||
<img src="../../_static/img/supersuit-white.svg">
|
||
|
||
SuperSuit
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/tinyscaler">
|
||
|
||
<img src="../../_static/img/tinyscaler-white.svg">
|
||
|
||
Tinyscaler
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/AutoROM">
|
||
|
||
<img src="../../_static/img/autorom-white.svg">
|
||
|
||
AutoROM
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/Jumpy">
|
||
|
||
<img src="../../_static/img/jumpy-white.svg">
|
||
|
||
JumPy
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
<div class="farama-header-menu__section" style="padding-left: 24px;" >
|
||
<span class="farama-header-menu__section-title">Incubating Projects</span>
|
||
<ul class="farama-header-menu-list">
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/MAgent2">
|
||
|
||
<img src="../../_static/img/MAgent2-white.svg">
|
||
|
||
MAgent2
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/procgen2">
|
||
|
||
<img src="../../_static/img/procgen2-white.svg">
|
||
|
||
Procgen2
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/MiniWorld">
|
||
|
||
<img src="../../_static/img/miniworld-white.svg">
|
||
|
||
Miniworld
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/D4RL">
|
||
|
||
<img src="../../_static/img/d4rl-white.svg">
|
||
|
||
D4RL
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/Kabuki">
|
||
|
||
<img src="../../_static/img/kabuki-white.svg">
|
||
|
||
Kabuki
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
<div class="farama-header-menu__section" style="padding-left: 24px;" >
|
||
<span class="farama-header-menu__section-title">Foundation</span>
|
||
<ul class="farama-header-menu-list">
|
||
<li>
|
||
<a href="https://farama.org/about">
|
||
|
||
About
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://farama.org/project_standards">
|
||
|
||
Standards
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://farama.org/donations">
|
||
|
||
Donate
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</header>
|
||
|
||
<div class="farama-header-menu__overlay"></div>
|
||
|
||
|
||
<script>
|
||
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
|
||
</script>
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-toc" viewBox="0 0 24 24">
|
||
<title>Contents</title>
|
||
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
|
||
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-menu" viewBox="0 0 24 24">
|
||
<title>Menu</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
|
||
<line x1="3" y1="12" x2="21" y2="12"></line>
|
||
<line x1="3" y1="6" x2="21" y2="6"></line>
|
||
<line x1="3" y1="18" x2="21" y2="18"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
|
||
<title>Expand</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
|
||
<polyline points="9 18 15 12 9 6"></polyline>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24">
|
||
<title>Light mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24">
|
||
<title>Dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24">
|
||
<title>Auto light/dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
|
||
<circle cx="12" cy="12" r="9" />
|
||
<path d="M13 12h5" />
|
||
<path d="M13 15h4" />
|
||
<path d="M13 18h1" />
|
||
<path d="M13 9h4" />
|
||
<path d="M13 6h1" />
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
|
||
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
|
||
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
|
||
<label class="overlay sidebar-overlay" for="__navigation">
|
||
<div class="visually-hidden">Hide navigation sidebar</div>
|
||
</label>
|
||
<label class="overlay toc-overlay" for="__toc">
|
||
<div class="visually-hidden">Hide table of contents sidebar</div>
|
||
</label>
|
||
|
||
|
||
|
||
<div class="page">
|
||
<header class="mobile-header">
|
||
<div class="header-left">
|
||
<label class="nav-overlay-icon" for="__navigation">
|
||
<div class="visually-hidden">Toggle site navigation sidebar</div>
|
||
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
|
||
</label>
|
||
</div>
|
||
<div class="header-center">
|
||
<a href="../../"><div class="brand">Gymnasium Documentation</div></a>
|
||
</div>
|
||
<div class="header-right">
|
||
<div class="theme-toggle-container theme-toggle-header">
|
||
<button class="theme-toggle">
|
||
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
||
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-header-icon" for="__toc">
|
||
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
||
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
|
||
</label>
|
||
</div>
|
||
</header>
|
||
<aside class="sidebar-drawer">
|
||
<div class="sidebar-container">
|
||
|
||
<div class="sidebar-sticky"><form class="sidebar-search-container" method="get" action="../../search/" role="search">
|
||
<input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
|
||
<input type="hidden" name="check_keywords" value="yes">
|
||
<input type="hidden" name="area" value="default">
|
||
</form>
|
||
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
|
||
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../content/basic_usage/">Basic Usage</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../content/gym_compatibility/">Compatibility with Gym</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../content/migration-guide/">v21 to v26 Migration Guide</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">API</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/env/">Env</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/registry/">Registry</a></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/spaces/fundamental/">Fundamental Spaces</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/spaces/composite/">Composite Spaces</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/spaces/utils/">Spaces Utils</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/spaces/vector_utils/">Spaces Vector Utils</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../api/wrappers/">Wrappers</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/wrappers/misc_wrappers/">Misc Wrappers</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/wrappers/action_wrappers/">Action Wrappers</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/wrappers/observation_wrappers/">Observation Wrappers</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/wrappers/reward_wrappers/">Reward Wrappers</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/vector/">Vector</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/utils/">Utils</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
|
||
<ul>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/acrobot/">Acrobot</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/cart_pole/">Cart Pole</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/mountain_car/">Mountain Car</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/pendulum/">Pendulum</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/box2d/car_racing/">Car Racing</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/blackjack/">Blackjack</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/taxi/">Taxi</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/ant/">Ant</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/hopper/">Hopper</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/humanoid/">Humanoid</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/reacher/">Reacher</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/swimmer/">Swimmer</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/pusher/">Pusher</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/walker2d/">Walker2D</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/atari/">Atari</a><input class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/adventure/">Adventure</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/air_raid/">Air Raid</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/alien/">Alien</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/amidar/">Amidar</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/assault/">Assault</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/asterix/">Asterix</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/asteroids/">Asteroids</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/atlantis/">Atlantis</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/bank_heist/">Bank Heist</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/battle_zone/">Battle Zone</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/beam_rider/">Beam Rider</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/berzerk/">Berzerk</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/bowling/">Bowling</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/boxing/">Boxing</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/breakout/">Breakout</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/carnival/">Carnival</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/centipede/">Centipede</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/chopper_command/">Chopper Command</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/crazy_climber/">Crazy Climber</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/defender/">Defender</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/demon_attack/">Demon Attack</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/double_dunk/">Double Dunk</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/elevator_action/">Elevator Action</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/enduro/">Enduro</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/fishing_derby/">FishingDerby</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/freeway/">Freeway</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/frostbite/">Frostbite</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/gopher/">Gopher</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/gravitar/">Gravitar</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/hero/">Hero</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/ice_hockey/">IceHockey</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/jamesbond/">Jamesbond</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/journey_escape/">JourneyEscape</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/kangaroo/">Kangaroo</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/krull/">Krull</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/kung_fu_master/">Kung Fu Master</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/montezuma_revenge/">Montezuma Revenge</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/ms_pacman/">Ms Pacman</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/name_this_game/">Name This Game</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/phoenix/">Phoenix</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/pitfall/">Pitfall</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/pong/">Pong</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/pooyan/">Pooyan</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/private_eye/">PrivateEye</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/qbert/">Qbert</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/riverraid/">Riverraid</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/road_runner/">Road Runner</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/robotank/">Robot Tank</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/seaquest/">Seaquest</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/skiing/">Skiings</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/solaris/">Solaris</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/space_invaders/">SpaceInvaders</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/star_gunner/">StarGunner</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/tennis/">Tennis</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/time_pilot/">TimePilot</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/tutankham/">Tutankham</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/up_n_down/">Up n’ Down</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/venture/">Venture</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/video_pinball/">Video Pinball</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/wizard_of_wor/">Wizard of Wor</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/zaxxon/">Zaxxon</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../environments/third_party_environments/">Third-Party Environments</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
|
||
<ul class="current">
|
||
<li class="toctree-l1"><a class="reference internal" href="../blackjack_tutorial/">Solving Blackjack with Q-Learning</a></li>
|
||
<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Make your own custom environment</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../handling_time_limits/">Handling Time Limits</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
|
||
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium/blob/main/docs/README.md">Contribute to the Docs</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</aside>
|
||
<div class="main">
|
||
<div class="content">
|
||
<div class="article-container">
|
||
<a href="#" class="back-to-top muted-link">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
|
||
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
|
||
</svg>
|
||
<span>Back to top</span>
|
||
</a>
|
||
<div class="content-icon-container">
|
||
|
||
<div class="edit-this-page">
|
||
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/edit/main/docs/tutorials/environment_creation.py" title="Edit this page">
|
||
<svg aria-hidden="true" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
|
||
<path d="M4 20h4l10.5 -10.5a1.5 1.5 0 0 0 -4 -4l-10.5 10.5v4" />
|
||
<line x1="13.5" y1="6.5" x2="17.5" y2="10.5" />
|
||
</svg>
|
||
<span class="visually-hidden">Edit this page</span>
|
||
</a>
|
||
</div><div class="theme-toggle-container theme-toggle-content">
|
||
<button class="theme-toggle">
|
||
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
||
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-content-icon" for="__toc">
|
||
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
||
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
|
||
</label>
|
||
</div>
|
||
<article role="main">
|
||
|
||
<section id="make-your-own-custom-environment">
|
||
<h1>Make your own custom environment<a class="headerlink" href="#make-your-own-custom-environment" title="Permalink to this heading">#</a></h1>
|
||
<p>This documentation overviews creating new environments and relevant
|
||
useful wrappers, utilities and tests included in Gymnasium designed for
|
||
the creation of new environments. You can clone gym-examples to play
|
||
with the code that is presented here. We recommend that you use a virtual environment:</p>
|
||
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="go">git clone https://github.com/Farama-Foundation/gym-examples</span>
|
||
<span class="go">cd gym-examples</span>
|
||
<span class="go">python -m venv .env</span>
|
||
<span class="go">source .env/bin/activate</span>
|
||
<span class="go">pip install -e .</span>
|
||
</pre></div>
|
||
</div>
|
||
<section id="subclassing-gymnasium-env">
|
||
<h2>Subclassing gymnasium.Env<a class="headerlink" href="#subclassing-gymnasium-env" title="Permalink to this heading">#</a></h2>
|
||
<p>Before learning how to create your own environment you should check out
|
||
<a class="reference external" href="/api/core">the documentation of Gymnasium’s API</a>.</p>
|
||
<p>We will be concerned with a subset of gym-examples that looks like this:</p>
|
||
<div class="highlight-sh notranslate"><div class="highlight"><pre><span></span>gym-examples/
|
||
README.md
|
||
setup.py
|
||
gym_examples/
|
||
__init__.py
|
||
envs/
|
||
__init__.py
|
||
grid_world.py
|
||
wrappers/
|
||
__init__.py
|
||
relative_position.py
|
||
reacher_weighted_reward.py
|
||
discrete_action.py
|
||
clip_reward.py
|
||
</pre></div>
|
||
</div>
|
||
<p>To illustrate the process of subclassing <code class="docutils literal notranslate"><span class="pre">gymnasium.Env</span></code>, we will
|
||
implement a very simplistic game, called <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code>. We will write
|
||
the code for our custom environment in
|
||
<code class="docutils literal notranslate"><span class="pre">gym-examples/gym_examples/envs/grid_world.py</span></code>. The environment
|
||
consists of a 2-dimensional square grid of fixed size (specified via the
|
||
<code class="docutils literal notranslate"><span class="pre">size</span></code> parameter during construction). The agent can move vertically
|
||
or horizontally between grid cells in each timestep. The goal of the
|
||
agent is to navigate to a target on the grid that has been placed
|
||
randomly at the beginning of the episode.</p>
|
||
<ul class="simple">
|
||
<li><p>Observations provide the location of the target and agent.</p></li>
|
||
<li><p>There are 4 actions in our environment, corresponding to the
|
||
movements “right”, “up”, “left”, and “down”.</p></li>
|
||
<li><p>A done signal is issued as soon as the agent has navigated to the
|
||
grid cell where the target is located.</p></li>
|
||
<li><p>Rewards are binary and sparse, meaning that the immediate reward is
|
||
always zero, unless the agent has reached the target, then it is 1.</p></li>
|
||
</ul>
|
||
<p>An episode in this environment (with <code class="docutils literal notranslate"><span class="pre">size=5</span></code>) might look like this:</p>
|
||
<p>where the blue dot is the agent and the red square represents the
|
||
target.</p>
|
||
<p>Let us look at the source code of <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code> piece by piece:</p>
|
||
<section id="declaration-and-initialization">
|
||
<h3>Declaration and Initialization<a class="headerlink" href="#declaration-and-initialization" title="Permalink to this heading">#</a></h3>
|
||
<p>Our custom environment will inherit from the abstract class
|
||
<code class="docutils literal notranslate"><span class="pre">gymnasium.Env</span></code>. You shouldn’t forget to add the <code class="docutils literal notranslate"><span class="pre">metadata</span></code>
|
||
attribute to your class. There, you should specify the render-modes that
|
||
are supported by your environment (e.g. <code class="docutils literal notranslate"><span class="pre">"human"</span></code>, <code class="docutils literal notranslate"><span class="pre">"rgb_array"</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">"ansi"</span></code>) and the framerate at which your environment should be
|
||
rendered. Every environment should support <code class="docutils literal notranslate"><span class="pre">None</span></code> as render-mode; you
|
||
don’t need to add it in the metadata. In <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code>, we will
|
||
support the modes “rgb_array” and “human” and render at 4 FPS.</p>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">__init__</span></code> method of our environment will accept the integer
|
||
<code class="docutils literal notranslate"><span class="pre">size</span></code>, that determines the size of the square grid. We will set up
|
||
some variables for rendering and define <code class="docutils literal notranslate"><span class="pre">self.observation_space</span></code> and
|
||
<code class="docutils literal notranslate"><span class="pre">self.action_space</span></code>. In our case, observations should provide
|
||
information about the location of the agent and target on the
|
||
2-dimensional grid. We will choose to represent observations in the form
|
||
of dictionaries with keys <code class="docutils literal notranslate"><span class="pre">"agent"</span></code> and <code class="docutils literal notranslate"><span class="pre">"target"</span></code>. An observation
|
||
may look like <code class="docutils literal notranslate"><span class="pre">{"agent":</span> <span class="pre">array([1,</span> <span class="pre">0]),</span> <span class="pre">"target":</span> <span class="pre">array([0,</span> <span class="pre">3])}</span></code>.
|
||
Since we have 4 actions in our environment (“right”, “up”, “left”,
|
||
“down”), we will use <code class="docutils literal notranslate"><span class="pre">Discrete(4)</span></code> as an action space. Here is the
|
||
declaration of <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code> and the implementation of <code class="docutils literal notranslate"><span class="pre">__init__</span></code>:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||
<span class="kn">import</span> <span class="nn">pygame</span>
|
||
|
||
<span class="kn">import</span> <span class="nn">gymnasium</span> <span class="k">as</span> <span class="nn">gym</span>
|
||
<span class="kn">from</span> <span class="nn">gymnasium</span> <span class="kn">import</span> <span class="n">spaces</span>
|
||
|
||
|
||
<span class="k">class</span> <span class="nc">GridWorldEnv</span><span class="p">(</span><span class="n">gym</span><span class="o">.</span><span class="n">Env</span><span class="p">):</span>
|
||
<span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"render_modes"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"human"</span><span class="p">,</span> <span class="s2">"rgb_array"</span><span class="p">],</span> <span class="s2">"render_fps"</span><span class="p">:</span> <span class="mi">4</span><span class="p">}</span>
|
||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">=</span> <span class="n">size</span> <span class="c1"># The size of the square grid</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">window_size</span> <span class="o">=</span> <span class="mi">512</span> <span class="c1"># The size of the PyGame window</span>
|
||
|
||
<span class="c1"># Observations are dictionaries with the agent's and the target's location.</span>
|
||
<span class="c1"># Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Dict</span><span class="p">(</span>
|
||
<span class="p">{</span>
|
||
<span class="s2">"agent"</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Box</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">size</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">),</span>
|
||
<span class="s2">"target"</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Box</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">size</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">),</span>
|
||
<span class="p">}</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="c1"># We have 4 actions, corresponding to "right", "up", "left", "down"</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">action_space</span> <span class="o">=</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Discrete</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
|
||
|
||
<span class="sd">"""</span>
|
||
<span class="sd"> The following dictionary maps abstract actions from `self.action_space` to</span>
|
||
<span class="sd"> the direction we will walk in if that action is taken.</span>
|
||
<span class="sd"> I.e. 0 corresponds to "right", 1 to "up" etc.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_action_to_direction</span> <span class="o">=</span> <span class="p">{</span>
|
||
<span class="mi">0</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]),</span>
|
||
<span class="mi">1</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]),</span>
|
||
<span class="mi">2</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]),</span>
|
||
<span class="mi">3</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">]),</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="k">assert</span> <span class="n">render_mode</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">render_mode</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"render_modes"</span><span class="p">]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">=</span> <span class="n">render_mode</span>
|
||
|
||
<span class="sd">"""</span>
|
||
<span class="sd"> If human-rendering is used, `self.window` will be a reference</span>
|
||
<span class="sd"> to the window that we draw to. `self.clock` will be a clock that is used</span>
|
||
<span class="sd"> to ensure that the environment is rendered at the correct framerate in</span>
|
||
<span class="sd"> human-mode. They will remain `None` until human-mode is used for the</span>
|
||
<span class="sd"> first time.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">clock</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="c1"># %%</span>
|
||
<span class="c1"># Constructing Observations From Environment States</span>
|
||
<span class="c1"># ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~</span>
|
||
<span class="c1">#</span>
|
||
<span class="c1"># Since we will need to compute observations both in ``reset`` and</span>
|
||
<span class="c1"># ``step``, it is often convenient to have a (private) method ``_get_obs``</span>
|
||
<span class="c1"># that translates the environment’s state into an observation. However,</span>
|
||
<span class="c1"># this is not mandatory and you may as well compute observations in</span>
|
||
<span class="c1"># ``reset`` and ``step`` separately:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_get_obs</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="p">{</span><span class="s2">"agent"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span><span class="p">,</span> <span class="s2">"target"</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">}</span>
|
||
|
||
<span class="c1"># %%</span>
|
||
<span class="c1"># We can also implement a similar method for the auxiliary information</span>
|
||
<span class="c1"># that is returned by ``step`` and ``reset``. In our case, we would like</span>
|
||
<span class="c1"># to provide the manhattan distance between the agent and the target:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_get_info</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="p">{</span>
|
||
<span class="s2">"distance"</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">,</span> <span class="nb">ord</span><span class="o">=</span><span class="mi">1</span>
|
||
<span class="p">)</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="c1"># %%</span>
|
||
<span class="c1"># Oftentimes, info will also contain some data that is only available</span>
|
||
<span class="c1"># inside the ``step`` method (e.g. individual reward terms). In that case,</span>
|
||
<span class="c1"># we would have to update the dictionary that is returned by ``_get_info``</span>
|
||
<span class="c1"># in ``step``.</span>
|
||
|
||
<span class="c1"># %%</span>
|
||
<span class="c1"># Reset</span>
|
||
<span class="c1"># ~~~~~</span>
|
||
<span class="c1">#</span>
|
||
<span class="c1"># The ``reset`` method will be called to initiate a new episode. You may</span>
|
||
<span class="c1"># assume that the ``step`` method will not be called before ``reset`` has</span>
|
||
<span class="c1"># been called. Moreover, ``reset`` should be called whenever a done signal</span>
|
||
<span class="c1"># has been issued. Users may pass the ``seed`` keyword to ``reset`` to</span>
|
||
<span class="c1"># initialize any random number generator that is used by the environment</span>
|
||
<span class="c1"># to a deterministic state. It is recommended to use the random number</span>
|
||
<span class="c1"># generator ``self.np_random`` that is provided by the environment’s base</span>
|
||
<span class="c1"># class, ``gymnasium.Env``. If you only use this RNG, you do not need to</span>
|
||
<span class="c1"># worry much about seeding, *but you need to remember to call</span>
|
||
<span class="c1"># ``super().reset(seed=seed)``* to make sure that ``gymnasium.Env``</span>
|
||
<span class="c1"># correctly seeds the RNG. Once this is done, we can randomly set the</span>
|
||
<span class="c1"># state of our environment. In our case, we randomly choose the agent’s</span>
|
||
<span class="c1"># location and the random sample target positions, until it does not</span>
|
||
<span class="c1"># coincide with the agent’s position.</span>
|
||
<span class="c1">#</span>
|
||
<span class="c1"># The ``reset`` method should return a tuple of the initial observation</span>
|
||
<span class="c1"># and some auxiliary information. We can use the methods ``_get_obs`` and</span>
|
||
<span class="c1"># ``_get_info`` that we implemented earlier for that:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="c1"># We need the following line to seed self.np_random</span>
|
||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Choose the agent's location uniformly at random</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
||
|
||
<span class="c1"># We will sample the target's location randomly until it does not coincide with the agent's location</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span>
|
||
<span class="k">while</span> <span class="n">np</span><span class="o">.</span><span class="n">array_equal</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span><span class="p">):</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">np_random</span><span class="o">.</span><span class="n">integers</span><span class="p">(</span>
|
||
<span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="n">observation</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_obs</span><span class="p">()</span>
|
||
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_info</span><span class="p">()</span>
|
||
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_render_frame</span><span class="p">()</span>
|
||
|
||
<span class="k">return</span> <span class="n">observation</span><span class="p">,</span> <span class="n">info</span>
|
||
|
||
<span class="c1"># %%</span>
|
||
<span class="c1"># Step</span>
|
||
<span class="c1"># ~~~~</span>
|
||
<span class="c1">#</span>
|
||
<span class="c1"># The ``step`` method usually contains most of the logic of your</span>
|
||
<span class="c1"># environment. It accepts an ``action``, computes the state of the</span>
|
||
<span class="c1"># environment after applying that action and returns the 4-tuple</span>
|
||
<span class="c1"># ``(observation, reward, done, info)``. Once the new state of the</span>
|
||
<span class="c1"># environment has been computed, we can check whether it is a terminal</span>
|
||
<span class="c1"># state and we set ``done`` accordingly. Since we are using sparse binary</span>
|
||
<span class="c1"># rewards in ``GridWorldEnv``, computing ``reward`` is trivial once we</span>
|
||
<span class="c1"># know ``done``. To gather ``observation`` and ``info``, we can again make</span>
|
||
<span class="c1"># use of ``_get_obs`` and ``_get_info``:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">):</span>
|
||
<span class="c1"># Map the action (element of {0,1,2,3}) to the direction we walk in</span>
|
||
<span class="n">direction</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_action_to_direction</span><span class="p">[</span><span class="n">action</span><span class="p">]</span>
|
||
<span class="c1"># We use `np.clip` to make sure we don't leave the grid</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">+</span> <span class="n">direction</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">-</span> <span class="mi">1</span>
|
||
<span class="p">)</span>
|
||
<span class="c1"># An episode is done iff the agent has reached the target</span>
|
||
<span class="n">terminated</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array_equal</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">)</span>
|
||
<span class="n">reward</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">terminated</span> <span class="k">else</span> <span class="mi">0</span> <span class="c1"># Binary sparse rewards</span>
|
||
<span class="n">observation</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_obs</span><span class="p">()</span>
|
||
<span class="n">info</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_info</span><span class="p">()</span>
|
||
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">_render_frame</span><span class="p">()</span>
|
||
|
||
<span class="k">return</span> <span class="n">observation</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="n">info</span>
|
||
|
||
<span class="c1"># %%</span>
|
||
<span class="c1"># Rendering</span>
|
||
<span class="c1"># ~~~~~~~~~</span>
|
||
<span class="c1">#</span>
|
||
<span class="c1"># Here, we are using PyGame for rendering. A similar approach to rendering</span>
|
||
<span class="c1"># is used in many environments that are included with Gymnasium and you</span>
|
||
<span class="c1"># can use it as a skeleton for your own environments:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"rgb_array"</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_render_frame</span><span class="p">()</span>
|
||
|
||
<span class="k">def</span> <span class="nf">_render_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="o">=</span> <span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">set_mode</span><span class="p">(</span>
|
||
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">)</span>
|
||
<span class="p">)</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">clock</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">clock</span> <span class="o">=</span> <span class="n">pygame</span><span class="o">.</span><span class="n">time</span><span class="o">.</span><span class="n">Clock</span><span class="p">()</span>
|
||
|
||
<span class="n">canvas</span> <span class="o">=</span> <span class="n">pygame</span><span class="o">.</span><span class="n">Surface</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">))</span>
|
||
<span class="n">canvas</span><span class="o">.</span><span class="n">fill</span><span class="p">((</span><span class="mi">255</span><span class="p">,</span> <span class="mi">255</span><span class="p">,</span> <span class="mi">255</span><span class="p">))</span>
|
||
<span class="n">pix_square_size</span> <span class="o">=</span> <span class="p">(</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">window_size</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">size</span>
|
||
<span class="p">)</span> <span class="c1"># The size of a single grid square in pixels</span>
|
||
|
||
<span class="c1"># First we draw the target</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">rect</span><span class="p">(</span>
|
||
<span class="n">canvas</span><span class="p">,</span>
|
||
<span class="p">(</span><span class="mi">255</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">Rect</span><span class="p">(</span>
|
||
<span class="n">pix_square_size</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">_target_location</span><span class="p">,</span>
|
||
<span class="p">(</span><span class="n">pix_square_size</span><span class="p">,</span> <span class="n">pix_square_size</span><span class="p">),</span>
|
||
<span class="p">),</span>
|
||
<span class="p">)</span>
|
||
<span class="c1"># Now we draw the agent</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">circle</span><span class="p">(</span>
|
||
<span class="n">canvas</span><span class="p">,</span>
|
||
<span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">255</span><span class="p">),</span>
|
||
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_agent_location</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span> <span class="o">*</span> <span class="n">pix_square_size</span><span class="p">,</span>
|
||
<span class="n">pix_square_size</span> <span class="o">/</span> <span class="mi">3</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="c1"># Finally, add some gridlines</span>
|
||
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span><span class="p">):</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">line</span><span class="p">(</span>
|
||
<span class="n">canvas</span><span class="p">,</span>
|
||
<span class="mi">0</span><span class="p">,</span>
|
||
<span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">),</span>
|
||
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">,</span> <span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">),</span>
|
||
<span class="n">width</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">draw</span><span class="o">.</span><span class="n">line</span><span class="p">(</span>
|
||
<span class="n">canvas</span><span class="p">,</span>
|
||
<span class="mi">0</span><span class="p">,</span>
|
||
<span class="p">(</span><span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span>
|
||
<span class="p">(</span><span class="n">pix_square_size</span> <span class="o">*</span> <span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">window_size</span><span class="p">),</span>
|
||
<span class="n">width</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">==</span> <span class="s2">"human"</span><span class="p">:</span>
|
||
<span class="c1"># The following line copies our drawings from `canvas` to the visible window</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">window</span><span class="o">.</span><span class="n">blit</span><span class="p">(</span><span class="n">canvas</span><span class="p">,</span> <span class="n">canvas</span><span class="o">.</span><span class="n">get_rect</span><span class="p">())</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">event</span><span class="o">.</span><span class="n">pump</span><span class="p">()</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">update</span><span class="p">()</span>
|
||
|
||
<span class="c1"># We need to ensure that human-rendering occurs at the predefined framerate.</span>
|
||
<span class="c1"># The following line will automatically add a delay to keep the framerate stable.</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">clock</span><span class="o">.</span><span class="n">tick</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="s2">"render_fps"</span><span class="p">])</span>
|
||
<span class="k">else</span><span class="p">:</span> <span class="c1"># rgb_array</span>
|
||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span>
|
||
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">pygame</span><span class="o">.</span><span class="n">surfarray</span><span class="o">.</span><span class="n">pixels3d</span><span class="p">(</span><span class="n">canvas</span><span class="p">)),</span> <span class="n">axes</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="c1"># %%</span>
|
||
<span class="c1"># Close</span>
|
||
<span class="c1"># ~~~~~</span>
|
||
<span class="c1">#</span>
|
||
<span class="c1"># The ``close`` method should close any open resources that were used by</span>
|
||
<span class="c1"># the environment. In many cases, you don’t actually have to bother to</span>
|
||
<span class="c1"># implement this method. However, in our example ``render_mode`` may be</span>
|
||
<span class="c1"># ``"human"`` and we might need to close the window that has been opened:</span>
|
||
|
||
<span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">window</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">display</span><span class="o">.</span><span class="n">quit</span><span class="p">()</span>
|
||
<span class="n">pygame</span><span class="o">.</span><span class="n">quit</span><span class="p">()</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>In other environments <code class="docutils literal notranslate"><span class="pre">close</span></code> might also close files that were opened
|
||
or release other resources. You shouldn’t interact with the environment
|
||
after having called <code class="docutils literal notranslate"><span class="pre">close</span></code>.</p>
|
||
</section>
|
||
</section>
|
||
<section id="registering-envs">
|
||
<h2>Registering Envs<a class="headerlink" href="#registering-envs" title="Permalink to this heading">#</a></h2>
|
||
<p>In order for the custom environments to be detected by Gymnasium, they
|
||
must be registered as follows. We will choose to put this code in
|
||
<code class="docutils literal notranslate"><span class="pre">gym-examples/gym_examples/__init__.py</span></code>.</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">gymnasium.envs.registration</span> <span class="kn">import</span> <span class="n">register</span>
|
||
|
||
<span class="n">register</span><span class="p">(</span>
|
||
<span class="nb">id</span><span class="o">=</span><span class="s2">"gym_examples/GridWorld-v0"</span><span class="p">,</span>
|
||
<span class="n">entry_point</span><span class="o">=</span><span class="s2">"gym_examples.envs:GridWorldEnv"</span><span class="p">,</span>
|
||
<span class="n">max_episode_steps</span><span class="o">=</span><span class="mi">300</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The environment ID consists of three components, two of which are
|
||
optional: an optional namespace (here: <code class="docutils literal notranslate"><span class="pre">gym_examples</span></code>), a mandatory
|
||
name (here: <code class="docutils literal notranslate"><span class="pre">GridWorld</span></code>) and an optional but recommended version
|
||
(here: v0). It might have also been registered as <code class="docutils literal notranslate"><span class="pre">GridWorld-v0</span></code> (the
|
||
recommended approach), <code class="docutils literal notranslate"><span class="pre">GridWorld</span></code> or <code class="docutils literal notranslate"><span class="pre">gym_examples/GridWorld</span></code>, and
|
||
the appropriate ID should then be used during environment creation.</p>
|
||
<p>The keyword argument <code class="docutils literal notranslate"><span class="pre">max_episode_steps=300</span></code> will ensure that
|
||
GridWorld environments that are instantiated via <code class="docutils literal notranslate"><span class="pre">gymnasium.make</span></code> will
|
||
be wrapped in a <code class="docutils literal notranslate"><span class="pre">TimeLimit</span></code> wrapper (see <a class="reference external" href="/api/wrappers">the wrapper
|
||
documentation</a> for more information). A done signal
|
||
will then be produced if the agent has reached the target <em>or</em> 300 steps
|
||
have been executed in the current episode. To distinguish truncation and
|
||
termination, you can check <code class="docutils literal notranslate"><span class="pre">info["TimeLimit.truncated"]</span></code>.</p>
|
||
<p>Apart from <code class="docutils literal notranslate"><span class="pre">id</span></code> and <code class="docutils literal notranslate"><span class="pre">entrypoint</span></code>, you may pass the following
|
||
additional keyword arguments to <code class="docutils literal notranslate"><span class="pre">register</span></code>:</p>
|
||
<div class="table-wrapper docutils container">
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p>Name</p></th>
|
||
<th class="head"><p>Type</p></th>
|
||
<th class="head"><p>Default</p></th>
|
||
<th class="head"><p>Description</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">reward_threshold</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">float</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">None</span></code></p></td>
|
||
<td><p>The reward threshold before the task is considered solved</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">nondeterministic</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">bool</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">False</span></code></p></td>
|
||
<td><p>Whether this environment is non-deterministic even after seeding</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">max_episode_steps</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">int</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">None</span></code></p></td>
|
||
<td><p>The maximum number of steps that an episode can consist of. If not <code class="docutils literal notranslate"><span class="pre">None</span></code>, a <code class="docutils literal notranslate"><span class="pre">TimeLimit</span></code> wrapper is added</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">order_enforce</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">bool</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">True</span></code></p></td>
|
||
<td><p>Whether to wrap the environment in an <code class="docutils literal notranslate"><span class="pre">OrderEnforcing</span></code> wrapper</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">autoreset</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">bool</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">False</span></code></p></td>
|
||
<td><p>Whether to wrap the environment in an <code class="docutils literal notranslate"><span class="pre">AutoResetWrapper</span></code></p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">kwargs</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">dict</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">{}</span></code></p></td>
|
||
<td><p>The default kwargs to pass to the environment class</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
<p>Most of these keywords (except for <code class="docutils literal notranslate"><span class="pre">max_episode_steps</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">order_enforce</span></code> and <code class="docutils literal notranslate"><span class="pre">kwargs</span></code>) do not alter the behavior of
|
||
environment instances but merely provide some extra information about
|
||
your environment. After registration, our custom <code class="docutils literal notranslate"><span class="pre">GridWorldEnv</span></code>
|
||
environment can be created with
|
||
<code class="docutils literal notranslate"><span class="pre">env</span> <span class="pre">=</span> <span class="pre">gymnasium.make('gym_examples/GridWorld-v0')</span></code>.</p>
|
||
<p><code class="docutils literal notranslate"><span class="pre">gym-examples/gym_examples/envs/__init__.py</span></code> should have:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">gym_examples.envs.grid_world</span> <span class="kn">import</span> <span class="n">GridWorldEnv</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>If your environment is not registered, you may optionally pass a module
|
||
to import, that would register your environment before creating it like
|
||
this - <code class="docutils literal notranslate"><span class="pre">env</span> <span class="pre">=</span> <span class="pre">gymnasium.make('module:Env-v0')</span></code>, where <code class="docutils literal notranslate"><span class="pre">module</span></code>
|
||
contains the registration code. For the GridWorld env, the registration
|
||
code is run by importing <code class="docutils literal notranslate"><span class="pre">gym_examples</span></code> so if it were not possible to
|
||
import gym_examples explicitly, you could register while making by
|
||
<code class="docutils literal notranslate"><span class="pre">env</span> <span class="pre">=</span> <span class="pre">gymnasium.make('gym_examples:gym_examples/GridWorld-v0)</span></code>. This
|
||
is especially useful when you’re allowed to pass only the environment ID
|
||
into a third-party codebase (eg. learning library). This lets you
|
||
register your environment without needing to edit the library’s source
|
||
code.</p>
|
||
</section>
|
||
<section id="creating-a-package">
|
||
<h2>Creating a Package<a class="headerlink" href="#creating-a-package" title="Permalink to this heading">#</a></h2>
|
||
<p>The last step is to structure our code as a Python package. This
|
||
involves configuring <code class="docutils literal notranslate"><span class="pre">gym-examples/setup.py</span></code>. A minimal example of how
|
||
to do so is as follows:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">setuptools</span> <span class="kn">import</span> <span class="n">setup</span>
|
||
|
||
<span class="n">setup</span><span class="p">(</span>
|
||
<span class="n">name</span><span class="o">=</span><span class="s2">"gym_examples"</span><span class="p">,</span>
|
||
<span class="n">version</span><span class="o">=</span><span class="s2">"0.0.1"</span><span class="p">,</span>
|
||
<span class="n">install_requires</span><span class="o">=</span><span class="p">[</span><span class="s2">"gymnasium==0.26.0"</span><span class="p">,</span> <span class="s2">"pygame==2.1.0"</span><span class="p">],</span>
|
||
<span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="creating-environment-instances">
|
||
<h2>Creating Environment Instances<a class="headerlink" href="#creating-environment-instances" title="Permalink to this heading">#</a></h2>
|
||
<p>After you have installed your package locally with
|
||
<code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">-e</span> <span class="pre">gym-examples</span></code>, you can create an instance of the
|
||
environment via:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gym_examples</span>
|
||
<span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gym_examples/GridWorld-v0'</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>You can also pass keyword arguments of your environment’s constructor to
|
||
<code class="docutils literal notranslate"><span class="pre">gymnasium.make</span></code> to customize the environment. In our case, we could
|
||
do:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gym_examples/GridWorld-v0'</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Sometimes, you may find it more convenient to skip registration and call
|
||
the environment’s constructor yourself. Some may find this approach more
|
||
pythonic and environments that are instantiated like this are also
|
||
perfectly fine (but remember to add wrappers as well!).</p>
|
||
</section>
|
||
<section id="using-wrappers">
|
||
<h2>Using Wrappers<a class="headerlink" href="#using-wrappers" title="Permalink to this heading">#</a></h2>
|
||
<p>Oftentimes, we want to use different variants of a custom environment,
|
||
or we want to modify the behavior of an environment that is provided by
|
||
Gymnasium or some other party. Wrappers allow us to do this without
|
||
changing the environment implementation or adding any boilerplate code.
|
||
Check out the <a class="reference external" href="/api/wrappers/">wrapper documentation</a> for details on
|
||
how to use wrappers and instructions for implementing your own. In our
|
||
example, observations cannot be used directly in learning code because
|
||
they are dictionaries. However, we don’t actually need to touch our
|
||
environment implementation to fix this! We can simply add a wrapper on
|
||
top of environment instances to flatten observations into a single
|
||
array:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gym_examples</span>
|
||
<span class="kn">from</span> <span class="nn">gymnasium.wrappers</span> <span class="kn">import</span> <span class="n">FlattenObservation</span>
|
||
|
||
<span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gym_examples/GridWorld-v0'</span><span class="p">)</span>
|
||
<span class="n">wrapped_env</span> <span class="o">=</span> <span class="n">FlattenObservation</span><span class="p">(</span><span class="n">env</span><span class="p">)</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="n">wrapped_env</span><span class="o">.</span><span class="n">reset</span><span class="p">())</span> <span class="c1"># E.g. [3 0 3 3], {}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Wrappers have the big advantage that they make environments highly
|
||
modular. For instance, instead of flattening the observations from
|
||
GridWorld, you might only want to look at the relative position of the
|
||
target and the agent. In the section on
|
||
<a class="reference external" href="/api/wrappers/#observationwrapper">ObservationWrappers</a> we have
|
||
implemented a wrapper that does this job. This wrapper is also available
|
||
in gym-examples:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gym_examples</span>
|
||
<span class="kn">from</span> <span class="nn">gym_examples.wrappers</span> <span class="kn">import</span> <span class="n">RelativePosition</span>
|
||
|
||
<span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'gym_examples/GridWorld-v0'</span><span class="p">)</span>
|
||
<span class="n">wrapped_env</span> <span class="o">=</span> <span class="n">RelativePosition</span><span class="p">(</span><span class="n">env</span><span class="p">)</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="n">wrapped_env</span><span class="o">.</span><span class="n">reset</span><span class="p">())</span> <span class="c1"># E.g. [-3 3], {}</span>
|
||
</pre></div>
|
||
</div>
|
||
<div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-environment-creation-py">
|
||
<div class="sphx-glr-download sphx-glr-download-python docutils container">
|
||
<p><a class="reference download internal" download="" href="../../_downloads/01a413564c9bff768b24ed43b946607d/environment_creation.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">environment_creation.py</span></code></a></p>
|
||
</div>
|
||
<div class="sphx-glr-download sphx-glr-download-jupyter docutils container">
|
||
<p><a class="reference download internal" download="" href="../../_downloads/3831a62128c6d96d80d039f936893259/environment_creation.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">environment_creation.ipynb</span></code></a></p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
|
||
</article>
|
||
</div>
|
||
<footer>
|
||
|
||
<div class="related-pages">
|
||
<a class="next-page" href="../handling_time_limits/">
|
||
<div class="page-info">
|
||
<div class="context">
|
||
<span>Next</span>
|
||
</div>
|
||
<div class="title">Handling Time Limits</div>
|
||
</div>
|
||
<svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
|
||
</a>
|
||
<a class="prev-page" href="../blackjack_tutorial/">
|
||
<svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
|
||
<div class="page-info">
|
||
<div class="context">
|
||
<span>Previous</span>
|
||
</div>
|
||
|
||
<div class="title">Solving Blackjack with Q-Learning</div>
|
||
|
||
</div>
|
||
</a>
|
||
</div>
|
||
<div class="bottom-of-page">
|
||
<div class="left-details">
|
||
<div class="copyright">
|
||
Copyright © 2022 Farama Foundation
|
||
</div>
|
||
<!--
|
||
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
|
||
|
||
<a href="https://github.com/pradyunsg/furo">Furo</a>
|
||
-->
|
||
</div>
|
||
<div class="right-details">
|
||
<div class="icons">
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</footer>
|
||
</div>
|
||
<aside class="toc-drawer">
|
||
|
||
|
||
<div class="toc-sticky toc-scroll">
|
||
<div class="toc-title-container">
|
||
<span class="toc-title">
|
||
On this page
|
||
</span>
|
||
</div>
|
||
<div class="toc-tree-container">
|
||
<div class="toc-tree">
|
||
<ul>
|
||
<li><a class="reference internal" href="#">Make your own custom environment</a><ul>
|
||
<li><a class="reference internal" href="#subclassing-gymnasium-env">Subclassing gymnasium.Env</a><ul>
|
||
<li><a class="reference internal" href="#declaration-and-initialization">Declaration and Initialization</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#registering-envs">Registering Envs</a></li>
|
||
<li><a class="reference internal" href="#creating-a-package">Creating a Package</a></li>
|
||
<li><a class="reference internal" href="#creating-environment-instances">Creating Environment Instances</a></li>
|
||
<li><a class="reference internal" href="#using-wrappers">Using Wrappers</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</aside>
|
||
</div>
|
||
</div>
|
||
<script>
|
||
let toggleMenu = () => {
|
||
document.querySelector(".farama-header-menu").classList.toggle("active");
|
||
document.querySelector(".farama-header-menu__overlay").classList.toggle("active");
|
||
}
|
||
|
||
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
|
||
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
|
||
document.querySelector(".farama-header-menu__overlay").addEventListener("click", toggleMenu);
|
||
|
||
window.onclick = function(event) {
|
||
if (!event.target.matches('.farama-header-menu__btn')) {
|
||
const dropdown = document.querySelector(".farama-header-menu-container");
|
||
if (dropdown.classList.contains('active')) {
|
||
dropdown.classList.remove('active');
|
||
document.querySelector(".farama-header-menu__overlay").classList.remove("active");
|
||
}
|
||
}
|
||
}
|
||
</script>
|
||
|
||
|
||
<script>
|
||
(() => {
|
||
if (!localStorage.getItem("shownCookieAlert")) {
|
||
const boxElem = document.createElement("div");
|
||
boxElem.classList.add("cookie-alert");
|
||
const containerElem = document.createElement("div");
|
||
containerElem.classList.add("cookie-alert__container");
|
||
const textElem = document.createElement("p");
|
||
textElem.innerHTML = `This page uses <a href="https://analytics.google.com/">
|
||
Google Analytics</a> to collect statistics. You can disable it by blocking
|
||
the JavaScript coming from www.google-analytics.com.`;
|
||
containerElem.appendChild(textElem);
|
||
const closeBtn = document.createElement("button");
|
||
closeBtn.innerHTML = `<?xml version="1.0" ?><svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg"><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-linejoin:round;stroke-width:2px;}</style></defs><title/><g id="cross"><line class="cls-1" x1="7" x2="25" y1="7" y2="25"/><line class="cls-1" x1="7" x2="25" y1="25" y2="7"/></g></svg>`
|
||
closeBtn.onclick = () => {
|
||
localStorage.setItem("shownCookieAlert", "true");
|
||
boxElem.style.display = "none";
|
||
}
|
||
containerElem.appendChild(closeBtn);
|
||
boxElem.appendChild(containerElem);
|
||
document.body.appendChild(boxElem);
|
||
}
|
||
})()
|
||
</script>
|
||
|
||
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6H9C8TWXZ8"></script>
|
||
<script>
|
||
window.dataLayer = window.dataLayer || [];
|
||
function gtag(){dataLayer.push(arguments);}
|
||
gtag('js', new Date());
|
||
|
||
gtag('config', 'G-6H9C8TWXZ8');
|
||
</script>
|
||
|
||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||
<script src="../../_static/jquery.js"></script>
|
||
<script src="../../_static/underscore.js"></script>
|
||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||
<script src="../../_static/doctools.js"></script>
|
||
<script src="../../_static/sphinx_highlight.js"></script>
|
||
<script src="../../_static/scripts/furo.js"></script>
|
||
|
||
<script>
|
||
const versioningConfig = {
|
||
githubUser: 'Farama-Foundation',
|
||
githubRepo: 'Gymnasium',
|
||
};
|
||
fetch('/_static/versioning/versioning_menu.html').then(response => {
|
||
if (response.status === 200) {
|
||
response.text().then(text => {
|
||
const container = document.createElement("div");
|
||
container.innerHTML = text;
|
||
document.querySelector("body").appendChild(container);
|
||
// innerHtml doenst evaluate scripts, we need to add them dynamically
|
||
Array.from(container.querySelectorAll("script")).forEach(oldScript => {
|
||
const newScript = document.createElement("script");
|
||
Array.from(oldScript.attributes).forEach(attr => newScript.setAttribute(attr.name, attr.value));
|
||
newScript.appendChild(document.createTextNode(oldScript.innerHTML));
|
||
oldScript.parentNode.replaceChild(newScript, oldScript);
|
||
});
|
||
});
|
||
} else {
|
||
console.warn("Unable to load versioning menu", response);
|
||
}
|
||
});
|
||
</script></body>
|
||
</html> |