mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-20 22:12:03 +00:00
723 lines
60 KiB
HTML
723 lines
60 KiB
HTML
<!doctype html>
|
||
<html class="no-js" lang="en">
|
||
<head><meta charset="utf-8"/>
|
||
<meta name="viewport" content="width=device-width,initial-scale=1"/>
|
||
<meta name="color-scheme" content="light dark"><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||
<link rel="index" title="Index" href="../../genindex/" /><link rel="search" title="Search" href="../../search/" /><link rel="next" title="Core" href="../../api/core/" /><link rel="prev" title="Gymnasium is a standard API for reinforcement learning, and a diverse collection of reference environments" href="../../" />
|
||
<link rel="canonical" href="https://gymnasium.farama.org/content/basic_usage.html" />
|
||
|
||
<link rel="shortcut icon" href="../../_static/favicon.png"/><meta name="generator" content="sphinx-5.2.3, furo 2022.09.15.dev1"/>
|
||
<title>Basic Usage - Gymnasium Documentation</title>
|
||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo.css?digest=9ec31e2665bf879c1d47d93a8ec4893870ee1e45" />
|
||
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo-extensions.css?digest=dfbec4c0ec30de48fc84c6eaaf9ab0b056bb0414" />
|
||
|
||
|
||
|
||
|
||
<style>
|
||
body {
|
||
--color-code-background: #f8f8f8;
|
||
--color-code-foreground: black;
|
||
|
||
}
|
||
@media not print {
|
||
body[data-theme="dark"] {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
@media (prefers-color-scheme: dark) {
|
||
body:not([data-theme="light"]) {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
}
|
||
}
|
||
</style></head>
|
||
<body>
|
||
|
||
<header class="farama-header">
|
||
<div class="farama-header__container">
|
||
<div class="farama-header__left">
|
||
<a href="https://farama.org/">
|
||
<img class="farama-header__logo only-light" src="../../_static/img/gymnasium_black.svg" alt="Light Logo"/>
|
||
<img class="farama-header__logo only-dark" src="../../_static/img/gymnasium_white.svg" alt="Dark Logo"/>
|
||
<h1 class="farama-header__title">Gymnasium Documentation</h1>
|
||
</a>
|
||
</div>
|
||
<div class="farama-header__right">
|
||
<div class="farama-header-menu">
|
||
<div class="farama-header-menu__btn">
|
||
<span class="farama-header-menu__btn-name">
|
||
The Farama Foundation
|
||
</span>
|
||
<svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg" xmlns:bx="https://boxy-svg.com">
|
||
<defs></defs>
|
||
<path d="M 3 4.677 C 3 3.751 3.659 3 4.474 3 L 27.526 3 C 28.341 3 29 3.751 29 4.677 C 29 5.603 28.341 6.354 27.526 6.354 L 4.474 6.354 C 3.659 6.354 3 5.603 3 4.677 Z" bx:origin="0.622825 3.875593"></path>
|
||
<path d="M 3 16 C 3 15.074 3.659 14.323 4.474 14.323 L 27.526 14.323 C 28.341 14.323 29 15.074 29 16 C 29 16.926 28.341 17.677 27.526 17.677 L 4.474 17.677 C 3.659 17.677 3 16.926 3 16 Z" bx:origin="0.622825 0.5"></path>
|
||
<path d="M 3 27.323 C 3 26.397 3.659 25.646 4.474 25.646 L 27.526 25.646 C 28.341 25.646 29 26.397 29 27.323 C 29 28.249 28.341 29 27.526 29 L 4.474 29 C 3.659 29 3 28.249 3 27.323 Z" bx:origin="0.622825 -2.875591"></path>
|
||
</svg>
|
||
</div>
|
||
<div class="farama-header-menu__list">
|
||
<div class="farama-header-menu-list__header">
|
||
<img class="farama-header-menu__logo" src="../../_static/img/farama_solid_white.svg">
|
||
<span>The Farama Foundation</span>
|
||
<button id="farama-close-menu">
|
||
<svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon-close">
|
||
<line x1="3" y1="21" x2="21" y2="3"></line>
|
||
<line x1="3" y1="3" x2="21" y2="21"></line>
|
||
</svg>
|
||
</button>
|
||
</div>
|
||
<div class="farama-header-menu-list__body">
|
||
<span class="farama-header-menu__section-title">Projects</span>
|
||
<ul>
|
||
<li>
|
||
<a href="https://gymnasium.farama.org">Gymnasium</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://pettingzoo.farama.org">PettingZoo</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/SuperSuit">SuperSuit</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/tinyscaler">Tinyscaler</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/gym-robotics">Gymnasium-Robotics
|
||
</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/AutoROM">AutoROM</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/MiniGrid">MiniGrid</a>
|
||
</li>
|
||
<li>
|
||
<a href="https://github.com/Farama-Foundation/Jumpy">JumPy</a>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</header>
|
||
|
||
<div class="farama-header-menu__overlay"></div>
|
||
|
||
|
||
<script>
|
||
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
|
||
</script>
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-toc" viewBox="0 0 24 24">
|
||
<title>Contents</title>
|
||
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
|
||
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-menu" viewBox="0 0 24 24">
|
||
<title>Menu</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
|
||
<line x1="3" y1="12" x2="21" y2="12"></line>
|
||
<line x1="3" y1="6" x2="21" y2="6"></line>
|
||
<line x1="3" y1="18" x2="21" y2="18"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
|
||
<title>Expand</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
|
||
<polyline points="9 18 15 12 9 6"></polyline>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24">
|
||
<title>Light mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24">
|
||
<title>Dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun-half" viewBox="0 0 24 24">
|
||
<title>Auto light/dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
|
||
<circle cx="12" cy="12" r="9" />
|
||
<path d="M13 12h5" />
|
||
<path d="M13 15h4" />
|
||
<path d="M13 18h1" />
|
||
<path d="M13 9h4" />
|
||
<path d="M13 6h1" />
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
|
||
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
|
||
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
|
||
<label class="overlay sidebar-overlay" for="__navigation">
|
||
<div class="visually-hidden">Hide navigation sidebar</div>
|
||
</label>
|
||
<label class="overlay toc-overlay" for="__toc">
|
||
<div class="visually-hidden">Hide table of contents sidebar</div>
|
||
</label>
|
||
|
||
|
||
|
||
<div class="page">
|
||
<header class="mobile-header">
|
||
<div class="header-left">
|
||
<label class="nav-overlay-icon" for="__navigation">
|
||
<div class="visually-hidden">Toggle site navigation sidebar</div>
|
||
<i class="icon"><svg><use href="#svg-menu"></use></svg></i>
|
||
</label>
|
||
</div>
|
||
<div class="header-center">
|
||
<a href="../../"><div class="brand">Gymnasium Documentation</div></a>
|
||
</div>
|
||
<div class="header-right">
|
||
<div class="theme-toggle-container theme-toggle-header">
|
||
<button class="theme-toggle">
|
||
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
||
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-header-icon" for="__toc">
|
||
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
||
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
|
||
</label>
|
||
</div>
|
||
</header>
|
||
<aside class="sidebar-drawer">
|
||
<div class="sidebar-container">
|
||
|
||
<div class="sidebar-sticky"><form class="sidebar-search-container" method="get" action="../../search/" role="search">
|
||
<input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
|
||
<input type="hidden" name="check_keywords" value="yes">
|
||
<input type="hidden" name="area" value="default">
|
||
</form>
|
||
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
|
||
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
|
||
<ul class="current">
|
||
<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Basic Usage</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">API</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/core/">Core</a></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../api/spaces/">Spaces</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/spaces/fundamental/">Fundamental Spaces</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/spaces/composite/">Composite Spaces</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../api/spaces/utils/">Spaces Utils</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/wrappers/">Wrappers</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/vector/">Vector</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../api/utils/">Utils</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Environments</span></p>
|
||
<ul>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/atari/">Atari</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/adventure/">Adventure</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/air_raid/">Air Raid</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/alien/">Alien</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/amidar/">Amidar</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/assault/">Assault</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/asterix/">Asterix</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/asteroids/">Asteroids</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/atlantis/">Atlantis</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/bank_heist/">Bank Heist</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/battle_zone/">Battle Zone</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/beam_rider/">Beam Rider</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/berzerk/">Berzerk</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/bowling/">Bowling</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/boxing/">Boxing</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/breakout/">Breakout</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/carnival/">Carnival</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/centipede/">Centipede</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/chopper_command/">Chopper Command</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/crazy_climber/">Crazy Climber</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/defender/">Defender</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/demon_attack/">Demon Attack</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/double_dunk/">Double Dunk</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/elevator_action/">Elevator Action</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/enduro/">Enduro</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/fishing_derby/">FishingDerby</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/freeway/">Freeway</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/frostbite/">Frostbite</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/gopher/">Gopher</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/gravitar/">Gravitar</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/hero/">Hero</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/ice_hockey/">IceHockey</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/jamesbond/">Jamesbond</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/journey_escape/">JourneyEscape</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/kangaroo/">Kangaroo</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/krull/">Krull</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/kung_fu_master/">Kung Fu Master</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/montezuma_revenge/">Montezuma Revenge</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/ms_pacman/">Ms Pacman</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/name_this_game/">Name This Game</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/phoenix/">Phoenix</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/pitfall/">Pitfall</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/pong/">Pong</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/pooyan/">Pooyan</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/private_eye/">PrivateEye</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/qbert/">Qbert</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/riverraid/">Riverraid</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/road_runner/">Road Runner</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/robotank/">Robot Tank</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/seaquest/">Seaquest</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/skiing/">Skiings</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/solaris/">Solaris</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/space_invaders/">SpaceInvaders</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/star_gunner/">StarGunner</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/tennis/">Tennis</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/time_pilot/">TimePilot</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/tutankham/">Tutankham</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/up_n_down/">Up n’ Down</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/venture/">Venture</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/video_pinball/">Video Pinball</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/wizard_of_wor/">Wizard of Wor</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/atari/zaxxon/">Zaxxon</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/mujoco/">MuJoCo</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/ant/">Ant</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/half_cheetah/">Half Cheetah</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/hopper/">Hopper</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/humanoid_standup/">Humanoid Standup</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/humanoid/">Humanoid</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/inverted_double_pendulum/">Inverted Double Pendulum</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/inverted_pendulum/">Inverted Pendulum</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/reacher/">Reacher</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/swimmer/">Swimmer</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/mujoco/walker2d/">Walker2D</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/toy_text/">Toy Text</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/blackjack/">Blackjack</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/taxi/">Taxi</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/cliff_walking/">Cliff Walking</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/toy_text/frozen_lake/">Frozen Lake</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/classic_control/">Classic Control</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/acrobot/">Acrobot</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/cart_pole/">Cart Pole</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/mountain_car_continuous/">Mountain Car Continuous</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/mountain_car/">Mountain Car</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/classic_control/pendulum/">Pendulum</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../environments/box2d/">Box2D</a><input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><div class="visually-hidden">Toggle child pages in navigation</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/box2d/bipedal_walker/">Bipedal Walker</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/box2d/car_racing/">Car Racing</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../environments/box2d/lunar_lander/">Lunar Lander</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../environments/third_party_environments/">Third Party Environments</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../environment_creation/">Make your own custom environment</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../vectorising/">Vectorising your environments</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../handling_timelimits/">Handling Time Limits</a></li>
|
||
</ul>
|
||
<p class="caption" role="heading"><span class="caption-text">Development</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference external" href="https://github.com/Farama-Foundation/Gymnasium">Github</a></li>
|
||
<li class="toctree-l1"><a class="reference external" href="https://farama.org/donations">Donate</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</aside>
|
||
<div class="main">
|
||
<div class="content">
|
||
<div class="article-container">
|
||
<a href="#" class="back-to-top muted-link">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
|
||
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
|
||
</svg>
|
||
<span>Back to top</span>
|
||
</a>
|
||
<div class="content-icon-container">
|
||
|
||
|
||
<div class="edit-this-page">
|
||
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium/edit/master/docs/content/basic_usage.md" title="Edit this page">
|
||
<svg aria-hidden="true" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" fill="none" stroke-linecap="round" stroke-linejoin="round">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none"/>
|
||
<path d="M4 20h4l10.5 -10.5a1.5 1.5 0 0 0 -4 -4l-10.5 10.5v4" />
|
||
<line x1="13.5" y1="6.5" x2="17.5" y2="10.5" />
|
||
</svg>
|
||
<span class="visually-hidden">Edit this page</span>
|
||
</a>
|
||
</div><div class="theme-toggle-container theme-toggle-content">
|
||
<button class="theme-toggle">
|
||
<div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
|
||
<svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-content-icon" for="__toc">
|
||
<div class="visually-hidden">Toggle table of contents sidebar</div>
|
||
<i class="icon"><svg><use href="#svg-toc"></use></svg></i>
|
||
</label>
|
||
</div>
|
||
<article role="main">
|
||
|
||
<section id="basic-usage">
|
||
<h1>Basic Usage<a class="headerlink" href="#basic-usage" title="Permalink to this heading">#</a></h1>
|
||
<section id="initializing-environments">
|
||
<h2>Initializing Environments<a class="headerlink" href="#initializing-environments" title="Permalink to this heading">#</a></h2>
|
||
<p>Initializing environments is very easy in Gymnasium and can be done via:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gymnasium</span> <span class="k">as</span> <span class="nn">gym</span>
|
||
<span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'CartPole-v0'</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="interacting-with-the-environment">
|
||
<h2>Interacting with the Environment<a class="headerlink" href="#interacting-with-the-environment" title="Permalink to this heading">#</a></h2>
|
||
<p>Gymnasium implements the classic “agent-environment loop”:</p>
|
||
<a class="only-light reference internal image-reference" href="../../_images/AE_loop.png"><img alt="../../_images/AE_loop.png" class="only-light align-center" src="../../_images/AE_loop.png" style="width: 50%;" /></a>
|
||
<a class="only-dark reference internal image-reference" href="../../_images/AE_loop_dark.png"><img alt="../../_images/AE_loop_dark.png" class="only-dark align-center" src="../../_images/AE_loop_dark.png" style="width: 50%;" /></a>
|
||
<p>The agent performs some actions in the environment (usually by passing some control inputs to the environment, e.g. torque inputs of motors) and observes
|
||
how the environment’s state changes. One such action-observation exchange is referred to as a <em>timestep</em>.</p>
|
||
<p>The goal in RL is to manipulate the environment in some specific way. For instance, we want the agent to navigate a robot
|
||
to a specific point in space. If it succeeds in doing this (or makes some progress towards that goal), it will receive a positive reward
|
||
alongside the observation for this timestep. The reward may also be negative or 0, if the agent did not yet succeed (or did not make any progress).
|
||
The agent will then be trained to maximize the reward it accumulates over many timesteps.</p>
|
||
<p>After some timesteps, the environment may enter a terminal state. For instance, the robot may have crashed, or the agent may have succeeded in completing a task. In that case, we want to reset the environment to a new initial state. The environment issues a terminated signal to the agent if it enters such a terminal state. Sometimes we also want to end the episode after a fixed number of timesteps, in this case, the environment issues a truncated signal.
|
||
This is a new change in API (v0.26 onwards). Earlier a common done signal was issued for an episode ending via any means. This is now changed in favour of issuing two signals - terminated and truncated.</p>
|
||
<p>Let’s see what the agent-environment loop looks like in Gymnasium.
|
||
This example will run an instance of <code class="docutils literal notranslate"><span class="pre">LunarLander-v2</span></code> environment for 1000 timesteps. Since we pass <code class="docutils literal notranslate"><span class="pre">render_mode="human"</span></code>, you should see a window pop up rendering the environment.</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gymnasium</span> <span class="k">as</span> <span class="nn">gym</span>
|
||
<span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s2">"LunarLander-v2"</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="s2">"human"</span><span class="p">)</span>
|
||
<span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">42</span><span class="p">)</span>
|
||
|
||
<span class="n">observation</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
||
|
||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1000</span><span class="p">):</span>
|
||
<span class="n">action</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">action_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="n">observation</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
|
||
|
||
<span class="k">if</span> <span class="n">terminated</span> <span class="ow">or</span> <span class="n">truncated</span><span class="p">:</span>
|
||
<span class="n">observation</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
|
||
|
||
<span class="n">env</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The output should look something like this</p>
|
||
<figure class="align-center">
|
||
<a class="reference internal image-reference" href="https://user-images.githubusercontent.com/15806078/153222406-af5ce6f0-4696-4a24-a683-46ad4939170c.gif"><img alt="https://user-images.githubusercontent.com/15806078/153222406-af5ce6f0-4696-4a24-a683-46ad4939170c.gif" src="https://user-images.githubusercontent.com/15806078/153222406-af5ce6f0-4696-4a24-a683-46ad4939170c.gif" style="width: 50%;" /></a>
|
||
</figure>
|
||
<p>Every environment specifies the format of valid actions by providing an <code class="docutils literal notranslate"><span class="pre">env.action_space</span></code> attribute. Similarly,
|
||
the format of valid observations is specified by <code class="docutils literal notranslate"><span class="pre">env.observation_space</span></code>.
|
||
In the example above we sampled random actions via <code class="docutils literal notranslate"><span class="pre">env.action_space.sample()</span></code>. Note that we need to seed the action space separately from the
|
||
environment to ensure reproducible samples.</p>
|
||
<section id="change-in-env-step-api">
|
||
<h3>Change in env.step API<a class="headerlink" href="#change-in-env-step-api" title="Permalink to this heading">#</a></h3>
|
||
<p>Previously, the step method returned only one boolean - <code class="docutils literal notranslate"><span class="pre">done</span></code>. This is being deprecated in favour of returning two booleans <code class="docutils literal notranslate"><span class="pre">terminated</span></code> and <code class="docutils literal notranslate"><span class="pre">truncated</span></code> (v0.26 onwards).</p>
|
||
<p><code class="docutils literal notranslate"><span class="pre">terminated</span></code> signal is set to <code class="docutils literal notranslate"><span class="pre">True</span></code> when the core environment terminates inherently because of task completion, failure etc. a condition defined in the MDP.<br />
|
||
<code class="docutils literal notranslate"><span class="pre">truncated</span></code> signal is set to <code class="docutils literal notranslate"><span class="pre">True</span></code> when the episode ends specifically because of a time-limit or a condition not inherent to the environment (not defined in the MDP).
|
||
It is possible for <code class="docutils literal notranslate"><span class="pre">terminated=True</span></code> and <code class="docutils literal notranslate"><span class="pre">truncated=True</span></code> to occur at the same time when termination and truncation occur at the same step.</p>
|
||
<p>This is explained in detail in the <code class="docutils literal notranslate"><span class="pre">Handling</span> <span class="pre">Time</span> <span class="pre">Limits</span></code> section.</p>
|
||
<section id="backward-compatibility">
|
||
<h4>Backward compatibility<a class="headerlink" href="#backward-compatibility" title="Permalink to this heading">#</a></h4>
|
||
<p>Gym will retain support for the old API through compatibility wrappers.</p>
|
||
<p>Users can toggle the old API through <code class="docutils literal notranslate"><span class="pre">make</span></code> by setting <code class="docutils literal notranslate"><span class="pre">apply_api_compatibility=True</span></code>.</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">env</span> <span class="o">=</span> <span class="n">gym</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s2">"CartPole-v1"</span><span class="p">,</span> <span class="n">apply_api_compatibility</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This can also be done explicitly through a wrapper:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">gymasium.wrappers</span> <span class="kn">import</span> <span class="n">StepCompatibility</span>
|
||
<span class="n">env</span> <span class="o">=</span> <span class="n">StepCompatibility</span><span class="p">(</span><span class="n">CustomEnv</span><span class="p">(),</span> <span class="n">output_truncation_bool</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>For more details see the wrappers section.</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="checking-api-conformity">
|
||
<h2>Checking API-Conformity<a class="headerlink" href="#checking-api-conformity" title="Permalink to this heading">#</a></h2>
|
||
<p>If you have implemented a custom environment and would like to perform a sanity check to make sure that it conforms to
|
||
the API, you can run:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">gymnasium.utils.env_checker</span> <span class="kn">import</span> <span class="n">check_env</span>
|
||
<span class="gp">>>> </span><span class="n">check_env</span><span class="p">(</span><span class="n">env</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This function will throw an exception if it seems like your environment does not follow the Gymnasium API. It will also produce
|
||
warnings if it looks like you made a mistake or do not follow a best practice (e.g. if <code class="docutils literal notranslate"><span class="pre">observation_space</span></code> looks like
|
||
an image but does not have the right dtype). Warnings can be turned off by passing <code class="docutils literal notranslate"><span class="pre">warn=False</span></code>. By default, <code class="docutils literal notranslate"><span class="pre">check_env</span></code> will
|
||
not check the <code class="docutils literal notranslate"><span class="pre">render</span></code> method. To change this behavior, you can pass <code class="docutils literal notranslate"><span class="pre">skip_render_check=False</span></code>.</p>
|
||
<blockquote>
|
||
<div><p>After running <code class="docutils literal notranslate"><span class="pre">check_env</span></code> on an environment, you should not reuse the instance that was checked, as it may have already
|
||
been closed!</p>
|
||
</div></blockquote>
|
||
</section>
|
||
<section id="spaces">
|
||
<h2>Spaces<a class="headerlink" href="#spaces" title="Permalink to this heading">#</a></h2>
|
||
<p>Spaces are usually used to specify the format of valid actions and observations.
|
||
Every environment should have the attributes <code class="docutils literal notranslate"><span class="pre">action_space</span></code> and <code class="docutils literal notranslate"><span class="pre">observation_space</span></code>, both of which should be instances
|
||
of classes that inherit from <code class="docutils literal notranslate"><span class="pre">Space</span></code>.
|
||
There are multiple <code class="docutils literal notranslate"><span class="pre">Space</span></code> types available in Gymnasium:</p>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">Box</span></code>: describes an n-dimensional continuous space. It’s a bounded space where we can define the upper and lower limits which describe the valid values our observations can take.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">Discrete</span></code>: describes a discrete space where {0, 1, …, n-1} are the possible values our observation or action can take. Values can be shifted to {a, a+1, …, a+n-1} using an optional argument.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">Dict</span></code>: represents a dictionary of simple spaces.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">Tuple</span></code>: represents a tuple of simple spaces.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">MultiBinary</span></code>: creates a n-shape binary space. Argument n can be a number or a <code class="docutils literal notranslate"><span class="pre">list</span></code> of numbers.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">MultiDiscrete</span></code>: consists of a series of <code class="docutils literal notranslate"><span class="pre">Discrete</span></code> action spaces with a different number of actions in each element.</p></li>
|
||
</ul>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">from</span> <span class="nn">gymnasium.spaces</span> <span class="kn">import</span> <span class="n">Box</span><span class="p">,</span> <span class="n">Discrete</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">MultiBinary</span><span class="p">,</span> <span class="n">MultiDiscrete</span>
|
||
<span class="gp">>>> </span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">Box</span><span class="p">(</span><span class="n">low</span><span class="o">=-</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">high</span><span class="o">=</span><span class="mf">2.0</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="go">[ 1.6952509 -0.4399011 -0.7981693]</span>
|
||
<span class="go">>>></span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">Discrete</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="go">1</span>
|
||
<span class="gp">>>> </span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">Discrete</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="n">start</span><span class="o">=-</span><span class="mi">2</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="go">-2</span>
|
||
<span class="gp">>>> </span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">Dict</span><span class="p">({</span><span class="s2">"position"</span><span class="p">:</span> <span class="n">Discrete</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span> <span class="s2">"velocity"</span><span class="p">:</span> <span class="n">Discrete</span><span class="p">(</span><span class="mi">3</span><span class="p">)})</span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="go">OrderedDict([('position', 0), ('velocity', 1)])</span>
|
||
<span class="go">>>></span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">Tuple</span><span class="p">((</span><span class="n">Discrete</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span> <span class="n">Discrete</span><span class="p">(</span><span class="mi">3</span><span class="p">)))</span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="go">(1, 2)</span>
|
||
<span class="go">>>></span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">MultiBinary</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="go">[1 1 1 0 1]</span>
|
||
<span class="go">>>></span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span> <span class="o">=</span> <span class="n">MultiDiscrete</span><span class="p">([</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span> <span class="p">])</span>
|
||
<span class="gp">>>> </span><span class="n">observation_space</span><span class="o">.</span><span class="n">sample</span><span class="p">()</span>
|
||
<span class="go">[3 0 0]</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="wrappers">
|
||
<h2>Wrappers<a class="headerlink" href="#wrappers" title="Permalink to this heading">#</a></h2>
|
||
<p>Wrappers are a convenient way to modify an existing environment without having to alter the underlying code directly.
|
||
Using wrappers will allow you to avoid a lot of boilerplate code and make your environment more modular. Wrappers can
|
||
also be chained to combine their effects. Most environments that are generated via <code class="docutils literal notranslate"><span class="pre">gymnasium.make</span></code> will already be wrapped by default.</p>
|
||
<p>In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along
|
||
with (possibly optional) parameters to the wrapper’s constructor:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">gymnasium</span>
|
||
<span class="gp">>>> </span><span class="kn">from</span> <span class="nn">gymnasium.wrappers</span> <span class="kn">import</span> <span class="n">RescaleAction</span>
|
||
<span class="gp">>>> </span><span class="n">base_env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s2">"BipedalWalker-v3"</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">base_env</span><span class="o">.</span><span class="n">action_space</span>
|
||
<span class="go">Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32)</span>
|
||
<span class="gp">>>> </span><span class="n">wrapped_env</span> <span class="o">=</span> <span class="n">RescaleAction</span><span class="p">(</span><span class="n">base_env</span><span class="p">,</span> <span class="n">min_action</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_action</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||
<span class="gp">>>> </span><span class="n">wrapped_env</span><span class="o">.</span><span class="n">action_space</span>
|
||
<span class="go">Box([0. 0. 0. 0.], [1. 1. 1. 1.], (4,), float32)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>There are three very common things you might want a wrapper to do:</p>
|
||
<ul class="simple">
|
||
<li><p>Transform actions before applying them to the base environment</p></li>
|
||
<li><p>Transform observations that are returned by the base environment</p></li>
|
||
<li><p>Transform rewards that are returned by the base environment</p></li>
|
||
</ul>
|
||
<p>Such wrappers can be easily implemented by inheriting from <code class="docutils literal notranslate"><span class="pre">ActionWrapper</span></code>, <code class="docutils literal notranslate"><span class="pre">ObservationWrapper</span></code>, or <code class="docutils literal notranslate"><span class="pre">RewardWrapper</span></code> and implementing the
|
||
respective transformation.</p>
|
||
<p>However, sometimes you might need to implement a wrapper that does some more complicated modifications (e.g. modify the
|
||
reward based on data in <code class="docutils literal notranslate"><span class="pre">info</span></code>). Such wrappers
|
||
can be implemented by inheriting from <code class="docutils literal notranslate"><span class="pre">Wrapper</span></code>.
|
||
Gymnasium already provides many commonly used wrappers for you. Some examples:</p>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">TimeLimit</span></code>: Issue a truncated signal if a maximum number of timesteps has been exceeded (or the base environment has issued a truncated signal).</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">ClipAction</span></code>: Clip the action such that it lies in the action space (of type <code class="docutils literal notranslate"><span class="pre">Box</span></code>).</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">RescaleAction</span></code>: Rescale actions to lie in a specified interval</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">TimeAwareObservation</span></code>: Add information about the index of timestep to observation. In some cases helpful to ensure that transitions are Markov.</p></li>
|
||
</ul>
|
||
<p>If you have a wrapped environment, and you want to get the unwrapped environment underneath all of the layers of wrappers (so that you can manually call a function or change some underlying aspect of the environment), you can use the <code class="docutils literal notranslate"><span class="pre">.unwrapped</span></code> attribute. If the environment is already a base environment, the <code class="docutils literal notranslate"><span class="pre">.unwrapped</span></code> attribute will just return itself.</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">wrapped_env</span>
|
||
<span class="go"><RescaleAction<TimeLimit<BipedalWalker<BipedalWalker-v3>>>></span>
|
||
<span class="gp">>>> </span><span class="n">wrapped_env</span><span class="o">.</span><span class="n">unwrapped</span>
|
||
<span class="go"><gymnasium.envs.box2d.bipedal_walker.BipedalWalker object at 0x7f87d70712d0></span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="playing-within-an-environment">
|
||
<h2>Playing within an environment<a class="headerlink" href="#playing-within-an-environment" title="Permalink to this heading">#</a></h2>
|
||
<p>You can also play the environment using your keyboard using the <code class="docutils literal notranslate"><span class="pre">play</span></code> function in <code class="docutils literal notranslate"><span class="pre">gymnasium.utils.play</span></code>.</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">gymnasium.utils.play</span> <span class="kn">import</span> <span class="n">play</span>
|
||
<span class="n">play</span><span class="p">(</span><span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s1">'Pong-v0'</span><span class="p">))</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This opens a window of the environment and allows you to control the agent using your keyboard.</p>
|
||
<p>Playing using the keyboard requires a key-action map. This map should have type <code class="docutils literal notranslate"><span class="pre">dict[tuple[int],</span> <span class="pre">int</span> <span class="pre">|</span> <span class="pre">None]</span></code>, which maps the keys pressed to action performed.
|
||
For example, if pressing the keys <code class="docutils literal notranslate"><span class="pre">w</span></code> and <code class="docutils literal notranslate"><span class="pre">space</span></code> at the same time is supposed to perform action <code class="docutils literal notranslate"><span class="pre">2</span></code>, then the <code class="docutils literal notranslate"><span class="pre">key_to_action</span></code> dict should look like:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
|
||
<span class="c1"># ...</span>
|
||
<span class="p">(</span><span class="nb">ord</span><span class="p">(</span><span class="s1">'w'</span><span class="p">),</span> <span class="nb">ord</span><span class="p">(</span><span class="s1">' '</span><span class="p">)):</span> <span class="mi">2</span><span class="p">,</span>
|
||
<span class="c1"># ...</span>
|
||
<span class="p">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>As a more complete example, let’s say we wish to play with <code class="docutils literal notranslate"><span class="pre">CartPole-v0</span></code> using our left and right arrow keys. The code would be as follows:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">gymnasium</span> <span class="k">as</span> <span class="nn">gym</span>
|
||
<span class="kn">import</span> <span class="nn">pygame</span>
|
||
<span class="kn">from</span> <span class="nn">gymnasium.utils.play</span> <span class="kn">import</span> <span class="n">play</span>
|
||
<span class="n">mapping</span> <span class="o">=</span> <span class="p">{(</span><span class="n">pygame</span><span class="o">.</span><span class="n">K_LEFT</span><span class="p">,):</span> <span class="mi">0</span><span class="p">,</span> <span class="p">(</span><span class="n">pygame</span><span class="o">.</span><span class="n">K_RIGHT</span><span class="p">,):</span> <span class="mi">1</span><span class="p">}</span>
|
||
<span class="n">play</span><span class="p">(</span><span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s2">"CartPole-v0"</span><span class="p">),</span> <span class="n">keys_to_action</span><span class="o">=</span><span class="n">mapping</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>where we obtain the corresponding key ID constants from pygame. If the <code class="docutils literal notranslate"><span class="pre">key_to_action</span></code> argument is not specified, then the default <code class="docutils literal notranslate"><span class="pre">key_to_action</span></code> mapping for that env is used, if provided.</p>
|
||
<p>Furthermore, if you wish to plot real time statistics as you play, you can use <code class="docutils literal notranslate"><span class="pre">gymnasium.utils.play.PlayPlot</span></code>. Here’s some sample code for plotting the reward for last 5 second of gameplay:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">callback</span><span class="p">(</span><span class="n">obs_t</span><span class="p">,</span> <span class="n">obs_tp1</span><span class="p">,</span> <span class="n">action</span><span class="p">,</span> <span class="n">rew</span><span class="p">,</span> <span class="n">terminated</span><span class="p">,</span> <span class="n">truncated</span><span class="p">,</span> <span class="n">info</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="p">[</span><span class="n">rew</span><span class="p">,]</span>
|
||
<span class="n">plotter</span> <span class="o">=</span> <span class="n">PlayPlot</span><span class="p">(</span><span class="n">callback</span><span class="p">,</span> <span class="mi">30</span> <span class="o">*</span> <span class="mi">5</span><span class="p">,</span> <span class="p">[</span><span class="s2">"reward"</span><span class="p">])</span>
|
||
<span class="n">env</span> <span class="o">=</span> <span class="n">gymnasium</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s2">"Pong-v0"</span><span class="p">)</span>
|
||
<span class="n">play</span><span class="p">(</span><span class="n">env</span><span class="p">,</span> <span class="n">callback</span><span class="o">=</span><span class="n">plotter</span><span class="o">.</span><span class="n">callback</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
|
||
</article>
|
||
</div>
|
||
<footer>
|
||
|
||
<div class="related-pages">
|
||
<a class="next-page" href="../../api/core/">
|
||
<div class="page-info">
|
||
<div class="context">
|
||
<span>Next</span>
|
||
</div>
|
||
<div class="title">Core</div>
|
||
</div>
|
||
<svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
|
||
</a>
|
||
|
||
</div>
|
||
<div class="bottom-of-page">
|
||
<div class="left-details">
|
||
<div class="copyright">
|
||
Copyright © 2022, Farama Foundation
|
||
</div>
|
||
Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
|
||
|
||
<a href="https://github.com/pradyunsg/furo">Furo</a>
|
||
|
||
</div>
|
||
<div class="right-details">
|
||
<div class="icons">
|
||
<a class="muted-link" href="https://github.com/Farama-Foundation/Gymnasium" aria-label="On GitHub">
|
||
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
|
||
<path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path>
|
||
</svg>
|
||
</a>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</footer>
|
||
</div>
|
||
<aside class="toc-drawer">
|
||
|
||
|
||
<div class="toc-sticky toc-scroll">
|
||
<div class="toc-title-container">
|
||
<span class="toc-title">
|
||
On this page
|
||
</span>
|
||
</div>
|
||
<div class="toc-tree-container">
|
||
<div class="toc-tree">
|
||
<ul>
|
||
<li><a class="reference internal" href="#">Basic Usage</a><ul>
|
||
<li><a class="reference internal" href="#initializing-environments">Initializing Environments</a></li>
|
||
<li><a class="reference internal" href="#interacting-with-the-environment">Interacting with the Environment</a><ul>
|
||
<li><a class="reference internal" href="#change-in-env-step-api">Change in env.step API</a><ul>
|
||
<li><a class="reference internal" href="#backward-compatibility">Backward compatibility</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li><a class="reference internal" href="#checking-api-conformity">Checking API-Conformity</a></li>
|
||
<li><a class="reference internal" href="#spaces">Spaces</a></li>
|
||
<li><a class="reference internal" href="#wrappers">Wrappers</a></li>
|
||
<li><a class="reference internal" href="#playing-within-an-environment">Playing within an environment</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</aside>
|
||
</div>
|
||
</div>
|
||
<script>
|
||
let toggleMenu = () => {
|
||
document.querySelector(".farama-header-menu").classList.toggle("active");
|
||
document.querySelector(".farama-header-menu__overlay").classList.toggle("active");
|
||
}
|
||
|
||
document.querySelector(".farama-header-menu__btn").addEventListener("click", toggleMenu);
|
||
document.getElementById("farama-close-menu").addEventListener("click", toggleMenu);
|
||
document.querySelector(".farama-header-menu__overlay").addEventListener("click", toggleMenu);
|
||
|
||
window.onclick = function(event) {
|
||
if (!event.target.matches('.farama-header-menu__btn')) {
|
||
const dropdown = document.querySelector(".farama-header-menu__list");
|
||
if (dropdown.classList.contains('active')) {
|
||
dropdown.classList.remove('active');
|
||
document.querySelector(".farama-header-menu__overlay").classList.remove("active");
|
||
}
|
||
}
|
||
}
|
||
</script>
|
||
|
||
|
||
|
||
|
||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||
<script src="../../_static/jquery.js"></script>
|
||
<script src="../../_static/underscore.js"></script>
|
||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||
<script src="../../_static/doctools.js"></script>
|
||
<script src="../../_static/sphinx_highlight.js"></script>
|
||
<script src="../../_static/scripts/furo.js"></script>
|
||
</body>
|
||
</html> |