2022-09-13 19:29:14 +00:00
<!doctype html>
< html class = "no-js" lang = "en" >
< head > < meta charset = "utf-8" / >
< meta name = "viewport" content = "width=device-width,initial-scale=1" / >
< meta name = "color-scheme" content = "light dark" > < meta name = "generator" content = "Docutils 0.18.1: http://docutils.sourceforge.net/" / >
< link rel = "index" title = "Index" href = "../../genindex/" / > < link rel = "search" title = "Search" href = "../../search/" / > < link rel = "next" title = "Core" href = "../../api/core/" / > < link rel = "prev" title = "Gymnasium is a standard API for reinforcement learning, and a diverse collection of reference environments" href = "../../" / >
< link rel = "canonical" href = "https://gymnasium.farama.org/content/basic_usage.html" / >
2022-09-16 13:02:04 +00:00
< link rel = "shortcut icon" href = "../../_static/favicon.png" / > < meta name = "generator" content = "sphinx-5.1.1, furo 2022.09.15" / >
2022-09-13 19:29:14 +00:00
< title > Basic Usage - Gymnasium Documentation< / title >
< link rel = "stylesheet" type = "text/css" href = "../../_static/pygments.css" / >
2022-09-16 13:02:04 +00:00
< link rel = "stylesheet" type = "text/css" href = "../../_static/styles/furo.css?digest=9ec31e2665bf879c1d47d93a8ec4893870ee1e45" / >
2022-09-13 19:29:14 +00:00
< link rel = "stylesheet" type = "text/css" href = "../../_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" / >
< link rel = "stylesheet" type = "text/css" href = "../../_static/css/custom.css" / >
< style >
body {
--color-code-background: #f8f8f8;
--color-code-foreground: black;
}
@media not print {
body[data-theme="dark"] {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
@media (prefers-color-scheme: dark) {
body:not([data-theme="light"]) {
--color-code-background: #202020;
--color-code-foreground: #d0d0d0;
}
}
}
< / style > < / head >
< body >
< script >
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
< / script >
< svg xmlns = "http://www.w3.org/2000/svg" style = "display: none;" >
< symbol id = "svg-toc" viewBox = "0 0 24 24" >
< title > Contents< / title >
< svg stroke = "currentColor" fill = "currentColor" stroke-width = "0" viewBox = "0 0 1024 1024" >
< path d = "M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z" / >
< / svg >
< / symbol >
< symbol id = "svg-menu" viewBox = "0 0 24 24" >
< title > Menu< / title >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" fill = "none" stroke = "currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
< line x1 = "3" y1 = "12" x2 = "21" y2 = "12" > < / line >
< line x1 = "3" y1 = "6" x2 = "21" y2 = "6" > < / line >
< line x1 = "3" y1 = "18" x2 = "21" y2 = "18" > < / line >
< / svg >
< / symbol >
< symbol id = "svg-arrow-right" viewBox = "0 0 24 24" >
< title > Expand< / title >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" fill = "none" stroke = "currentColor"
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
< polyline points = "9 18 15 12 9 6" > < / polyline >
< / svg >
< / symbol >
< symbol id = "svg-sun" viewBox = "0 0 24 24" >
< title > Light mode< / title >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" fill = "none" stroke = "currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
< circle cx = "12" cy = "12" r = "5" > < / circle >
< line x1 = "12" y1 = "1" x2 = "12" y2 = "3" > < / line >
< line x1 = "12" y1 = "21" x2 = "12" y2 = "23" > < / line >
< line x1 = "4.22" y1 = "4.22" x2 = "5.64" y2 = "5.64" > < / line >
< line x1 = "18.36" y1 = "18.36" x2 = "19.78" y2 = "19.78" > < / line >
< line x1 = "1" y1 = "12" x2 = "3" y2 = "12" > < / line >
< line x1 = "21" y1 = "12" x2 = "23" y2 = "12" > < / line >
< line x1 = "4.22" y1 = "19.78" x2 = "5.64" y2 = "18.36" > < / line >
< line x1 = "18.36" y1 = "5.64" x2 = "19.78" y2 = "4.22" > < / line >
< / svg >
< / symbol >
< symbol id = "svg-moon" viewBox = "0 0 24 24" >
< title > Dark mode< / title >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" fill = "none" stroke = "currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
< path stroke = "none" d = "M0 0h24v24H0z" fill = "none" / >
< path d = "M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" / >
< / svg >
< / symbol >
< symbol id = "svg-sun-half" viewBox = "0 0 24 24" >
< title > Auto light/dark mode< / title >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" fill = "none" stroke = "currentColor"
stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
< path stroke = "none" d = "M0 0h24v24H0z" fill = "none" / >
< circle cx = "12" cy = "12" r = "9" / >
< path d = "M13 12h5" / >
< path d = "M13 15h4" / >
< path d = "M13 18h1" / >
< path d = "M13 9h4" / >
< path d = "M13 6h1" / >
< / svg >
< / symbol >
< / svg >
< input type = "checkbox" class = "sidebar-toggle" name = "__navigation" id = "__navigation" >
< input type = "checkbox" class = "sidebar-toggle" name = "__toc" id = "__toc" >
< label class = "overlay sidebar-overlay" for = "__navigation" >
< div class = "visually-hidden" > Hide navigation sidebar< / div >
< / label >
< label class = "overlay toc-overlay" for = "__toc" >
< div class = "visually-hidden" > Hide table of contents sidebar< / div >
< / label >
< div class = "page" >
< header class = "mobile-header" >
< div class = "header-left" >
< label class = "nav-overlay-icon" for = "__navigation" >
< div class = "visually-hidden" > Toggle site navigation sidebar< / div >
< i class = "icon" > < svg > < use href = "#svg-menu" > < / use > < / svg > < / i >
< / label >
< / div >
< div class = "header-center" >
< a href = "../../" > < div class = "brand" > Gymnasium Documentation< / div > < / a >
< / div >
< div class = "header-right" >
< div class = "theme-toggle-container theme-toggle-header" >
< button class = "theme-toggle" >
< div class = "visually-hidden" > Toggle Light / Dark / Auto color theme< / div >
< svg class = "theme-icon-when-auto" > < use href = "#svg-sun-half" > < / use > < / svg >
< svg class = "theme-icon-when-dark" > < use href = "#svg-moon" > < / use > < / svg >
< svg class = "theme-icon-when-light" > < use href = "#svg-sun" > < / use > < / svg >
< / button >
< / div >
< label class = "toc-overlay-icon toc-header-icon" for = "__toc" >
< div class = "visually-hidden" > Toggle table of contents sidebar< / div >
< i class = "icon" > < svg > < use href = "#svg-toc" > < / use > < / svg > < / i >
< / label >
< / div >
< / header >
< aside class = "sidebar-drawer" >
< div class = "sidebar-container" >
< div class = "sidebar-sticky" > < a class = "sidebar-brand" href = "../../" >
< div class = "sidebar-logo-container" >
< img class = "sidebar-logo only-light" src = "../../_static/img/gymnasium_black.svg" alt = "Light Logo" / >
< img class = "sidebar-logo only-dark" src = "../../_static/img/gymnasium_white.svg" alt = "Dark Logo" / >
< / div >
< span class = "sidebar-brand-text" > Gymnasium Documentation< / span >
< / a > < form class = "sidebar-search-container" method = "get" action = "../../search/" role = "search" >
< input class = "sidebar-search" placeholder = Search name = "q" aria-label = "Search" >
< input type = "hidden" name = "check_keywords" value = "yes" >
< input type = "hidden" name = "area" value = "default" >
< / form >
< div id = "searchbox" > < / div > < div class = "sidebar-scroll" > < div class = "sidebar-tree" >
< p class = "caption" role = "heading" > < span class = "caption-text" > Introduction< / span > < / p >
< ul class = "current" >
< li class = "toctree-l1 current current-page" > < a class = "current reference internal" href = "#" > Basic Usage< / a > < / li >
< / ul >
< p class = "caption" role = "heading" > < span class = "caption-text" > API< / span > < / p >
< ul >
< li class = "toctree-l1" > < a class = "reference internal" href = "../../api/core/" > Core< / a > < / li >
2022-09-16 13:02:04 +00:00
< li class = "toctree-l1 has-children" > < a class = "reference internal" href = "../../api/spaces/" > Spaces< / a > < input class = "toctree-checkbox" id = "toctree-checkbox-1" name = "toctree-checkbox-1" role = "switch" type = "checkbox" / > < label for = "toctree-checkbox-1" > < div class = "visually-hidden" > Toggle child pages in navigation< / div > < i class = "icon" > < svg > < use href = "#svg-arrow-right" > < / use > < / svg > < / i > < / label > < ul >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../api/spaces/fundamental/" > Fundamental Spaces< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../api/spaces/composite/" > Composite Spaces< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../api/spaces/utils/" > Spaces Utils< / a > < / li >
< / ul >
< / li >
2022-09-13 19:29:14 +00:00
< li class = "toctree-l1" > < a class = "reference internal" href = "../../api/wrappers/" > Wrappers< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../../api/vector/" > Vector< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../../api/utils/" > Utils< / a > < / li >
< / ul >
< p class = "caption" role = "heading" > < span class = "caption-text" > Environments< / span > < / p >
< ul >
2022-09-16 13:02:04 +00:00
< li class = "toctree-l1 has-children" > < a class = "reference internal" href = "../../environments/atari/" > Atari< / a > < input class = "toctree-checkbox" id = "toctree-checkbox-2" name = "toctree-checkbox-2" role = "switch" type = "checkbox" / > < label for = "toctree-checkbox-2" > < div class = "visually-hidden" > Toggle child pages in navigation< / div > < i class = "icon" > < svg > < use href = "#svg-arrow-right" > < / use > < / svg > < / i > < / label > < ul >
2022-09-13 19:29:14 +00:00
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/adventure/" > Adventure< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/air_raid/" > Air Raid< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/alien/" > Alien< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/amidar/" > Amidar< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/assault/" > Assault< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/asterix/" > Asterix< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/asteroids/" > Asteroids< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/atlantis/" > Atlantis< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/bank_heist/" > Bank Heist< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/battle_zone/" > Battle Zone< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/beam_rider/" > Beam Rider< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/berzerk/" > Berzerk< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/bowling/" > Bowling< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/boxing/" > Boxing< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/breakout/" > Breakout< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/carnival/" > Carnival< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/centipede/" > Centipede< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/chopper_command/" > Chopper Command< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/crazy_climber/" > Crazy Climber< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/defender/" > Defender< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/demon_attack/" > Demon Attack< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/double_dunk/" > Double Dunk< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/elevator_action/" > Elevator Action< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/enduro/" > Enduro< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/fishing_derby/" > FishingDerby< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/freeway/" > Freeway< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/frostbite/" > Frostbite< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/gopher/" > Gopher< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/gravitar/" > Gravitar< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/hero/" > Hero< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/ice_hockey/" > IceHockey< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/jamesbond/" > Jamesbond< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/journey_escape/" > JourneyEscape< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/kangaroo/" > Kangaroo< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/krull/" > Krull< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/kung_fu_master/" > Kung Fu Master< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/montezuma_revenge/" > Montezuma Revenge< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/ms_pacman/" > Ms Pacman< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/name_this_game/" > Name This Game< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/phoenix/" > Phoenix< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/pitfall/" > Pitfall< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/pong/" > Pong< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/pooyan/" > Pooyan< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/private_eye/" > PrivateEye< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/qbert/" > Qbert< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/riverraid/" > Riverraid< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/road_runner/" > Road Runner< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/robotank/" > Robot Tank< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/seaquest/" > Seaquest< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/skiing/" > Skiings< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/solaris/" > Solaris< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/space_invaders/" > SpaceInvaders< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/star_gunner/" > StarGunner< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/tennis/" > Tennis< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/time_pilot/" > TimePilot< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/tutankham/" > Tutankham< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/up_n_down/" > Up n’ Down< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/venture/" > Venture< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/video_pinball/" > Video Pinball< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/wizard_of_wor/" > Wizard of Wor< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/atari/zaxxon/" > Zaxxon< / a > < / li >
< / ul >
< / li >
2022-09-16 13:02:04 +00:00
< li class = "toctree-l1 has-children" > < a class = "reference internal" href = "../../environments/mujoco/" > MuJoCo< / a > < input class = "toctree-checkbox" id = "toctree-checkbox-3" name = "toctree-checkbox-3" role = "switch" type = "checkbox" / > < label for = "toctree-checkbox-3" > < div class = "visually-hidden" > Toggle child pages in navigation< / div > < i class = "icon" > < svg > < use href = "#svg-arrow-right" > < / use > < / svg > < / i > < / label > < ul >
2022-09-13 19:29:14 +00:00
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/ant/" > Ant< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/half_cheetah/" > Half Cheetah< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/hopper/" > Hopper< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/humanoid_standup/" > Humanoid Standup< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/humanoid/" > Humanoid< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/inverted_double_pendulum/" > Inverted Double Pendulum< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/inverted_pendulum/" > Inverted Pendulum< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/reacher/" > Reacher< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/swimmer/" > Swimmer< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/mujoco/walker2d/" > Walker2D< / a > < / li >
< / ul >
< / li >
2022-09-16 13:02:04 +00:00
< li class = "toctree-l1 has-children" > < a class = "reference internal" href = "../../environments/toy_text/" > Toy Text< / a > < input class = "toctree-checkbox" id = "toctree-checkbox-4" name = "toctree-checkbox-4" role = "switch" type = "checkbox" / > < label for = "toctree-checkbox-4" > < div class = "visually-hidden" > Toggle child pages in navigation< / div > < i class = "icon" > < svg > < use href = "#svg-arrow-right" > < / use > < / svg > < / i > < / label > < ul >
2022-09-13 19:29:14 +00:00
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/toy_text/blackjack/" > Blackjack< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/toy_text/taxi/" > Taxi< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/toy_text/cliff_walking/" > Cliff Walking< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/toy_text/frozen_lake/" > Frozen Lake< / a > < / li >
< / ul >
< / li >
2022-09-16 13:02:04 +00:00
< li class = "toctree-l1 has-children" > < a class = "reference internal" href = "../../environments/classic_control/" > Classic Control< / a > < input class = "toctree-checkbox" id = "toctree-checkbox-5" name = "toctree-checkbox-5" role = "switch" type = "checkbox" / > < label for = "toctree-checkbox-5" > < div class = "visually-hidden" > Toggle child pages in navigation< / div > < i class = "icon" > < svg > < use href = "#svg-arrow-right" > < / use > < / svg > < / i > < / label > < ul >
2022-09-13 19:29:14 +00:00
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/classic_control/acrobot/" > Acrobot< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/classic_control/cart_pole/" > Cart Pole< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/classic_control/mountain_car_continuous/" > Mountain Car Continuous< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/classic_control/mountain_car/" > Mountain Car< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/classic_control/pendulum/" > Pendulum< / a > < / li >
< / ul >
< / li >
2022-09-16 13:02:04 +00:00
< li class = "toctree-l1 has-children" > < a class = "reference internal" href = "../../environments/box2d/" > Box2D< / a > < input class = "toctree-checkbox" id = "toctree-checkbox-6" name = "toctree-checkbox-6" role = "switch" type = "checkbox" / > < label for = "toctree-checkbox-6" > < div class = "visually-hidden" > Toggle child pages in navigation< / div > < i class = "icon" > < svg > < use href = "#svg-arrow-right" > < / use > < / svg > < / i > < / label > < ul >
2022-09-13 19:29:14 +00:00
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/box2d/bipedal_walker/" > Bipedal Walker< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/box2d/car_racing/" > Car Racing< / a > < / li >
< li class = "toctree-l2" > < a class = "reference internal" href = "../../environments/box2d/lunar_lander/" > Lunar Lander< / a > < / li >
< / ul >
< / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../../environments/third_party_environments/" > Third Party Environments< / a > < / li >
< / ul >
< p class = "caption" role = "heading" > < span class = "caption-text" > Tutorials< / span > < / p >
< ul >
< li class = "toctree-l1" > < a class = "reference internal" href = "../environment_creation/" > Make your own custom environment< / a > < / li >
< li class = "toctree-l1" > < a class = "reference internal" href = "../vectorising/" > Vectorising your environments< / a > < / li >
< / ul >
< p class = "caption" role = "heading" > < span class = "caption-text" > Development< / span > < / p >
< ul >
2022-09-16 13:02:04 +00:00
< li class = "toctree-l1" > < a class = "reference external" href = "https://github.com/Farama-Foundation/Gymnasium" > Github< / a > < / li >
2022-09-13 19:29:14 +00:00
< li class = "toctree-l1" > < a class = "reference external" href = "https://farama.org/donations" > Donate< / a > < / li >
< / ul >
< / div >
< / div >
< / div >
< / div >
< / aside >
< div class = "main" >
< div class = "content" >
< div class = "article-container" >
< a href = "#" class = "back-to-top muted-link" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" >
< path d = "M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z" > < / path >
< / svg >
< span > Back to top< / span >
< / a >
< div class = "content-icon-container" >
< div class = "theme-toggle-container theme-toggle-content" >
< button class = "theme-toggle" >
< div class = "visually-hidden" > Toggle Light / Dark / Auto color theme< / div >
< svg class = "theme-icon-when-auto" > < use href = "#svg-sun-half" > < / use > < / svg >
< svg class = "theme-icon-when-dark" > < use href = "#svg-moon" > < / use > < / svg >
< svg class = "theme-icon-when-light" > < use href = "#svg-sun" > < / use > < / svg >
< / button >
< / div >
< label class = "toc-overlay-icon toc-content-icon" for = "__toc" >
< div class = "visually-hidden" > Toggle table of contents sidebar< / div >
< i class = "icon" > < svg > < use href = "#svg-toc" > < / use > < / svg > < / i >
< / label >
< / div >
< article role = "main" >
< section id = "basic-usage" >
< h1 > Basic Usage< a class = "headerlink" href = "#basic-usage" title = "Permalink to this heading" > #< / a > < / h1 >
< section id = "initializing-environments" >
< h2 > Initializing Environments< a class = "headerlink" href = "#initializing-environments" title = "Permalink to this heading" > #< / a > < / h2 >
< p > Initializing environments is very easy in Gymnasium and can be done via:< / p >
2022-09-19 20:16:35 +00:00
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "kn" > import< / span > < span class = "nn" > gymnasium< / span > < span class = "k" > as< / span > < span class = "nn" > gym< / span >
< span class = "n" > env< / span > < span class = "o" > =< / span > < span class = "n" > gym< / span > < span class = "o" > .< / span > < span class = "n" > make< / span > < span class = "p" > (< / span > < span class = "s1" > ' CartPole-v0' < / span > < span class = "p" > )< / span >
2022-09-13 19:29:14 +00:00
< / pre > < / div >
< / div >
< / section >
< section id = "interacting-with-the-environment" >
< h2 > Interacting with the Environment< a class = "headerlink" href = "#interacting-with-the-environment" title = "Permalink to this heading" > #< / a > < / h2 >
< p > Gymnasium implements the classic “agent-environment loop”:< / p >
< a class = "only-light reference internal image-reference" href = "../../_images/AE_loop.png" > < img alt = "../../_images/AE_loop.png" class = "only-light align-center" src = "../../_images/AE_loop.png" style = "width: 50%;" / > < / a >
< a class = "only-dark reference internal image-reference" href = "../../_images/AE_loop_dark.png" > < img alt = "../../_images/AE_loop_dark.png" class = "only-dark align-center" src = "../../_images/AE_loop_dark.png" style = "width: 50%;" / > < / a >
< p > The agent performs some actions in the environment (usually by passing some control inputs to the environment, e.g. torque inputs of motors) and observes
how the environment’ s state changes. One such action-observation exchange is referred to as a < em > timestep< / em > .< / p >
< p > The goal in RL is to manipulate the environment in some specific way. For instance, we want the agent to navigate a robot
to a specific point in space. If it succeeds in doing this (or makes some progress towards that goal), it will receive a positive reward
alongside the observation for this timestep. The reward may also be negative or 0, if the agent did not yet succeed (or did not make any progress).
The agent will then be trained to maximize the reward it accumulates over many timesteps.< / p >
< p > After some timesteps, the environment may enter a terminal state. For instance, the robot may have crashed! In that case,
we want to reset the environment to a new initial state. The environment issues a done signal to the agent if it enters such a terminal state.
Not all done signals must be triggered by a “catastrophic failure”: Sometimes we also want to issue a done signal after
a fixed number of timesteps, or if the agent has succeeded in completing some task in the environment.< / p >
< p > Let’ s see what the agent-environment loop looks like in Gymnasium.
This example will run an instance of < code class = "docutils literal notranslate" > < span class = "pre" > LunarLander-v2< / span > < / code > environment for 1000 timesteps. Since we pass < code class = "docutils literal notranslate" > < span class = "pre" > render_mode=" human" < / span > < / code > , you should see a window pop up rendering the environment.< / p >
2022-09-19 20:16:35 +00:00
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "kn" > import< / span > < span class = "nn" > gymnasium< / span > < span class = "k" > as< / span > < span class = "nn" > gym< / span >
< span class = "n" > env< / span > < span class = "o" > =< / span > < span class = "n" > gym< / span > < span class = "o" > .< / span > < span class = "n" > make< / span > < span class = "p" > (< / span > < span class = "s2" > " LunarLander-v2" < / span > < span class = "p" > ,< / span > < span class = "n" > render_mode< / span > < span class = "o" > =< / span > < span class = "s2" > " human" < / span > < span class = "p" > )< / span >
2022-09-13 19:29:14 +00:00
< span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > action_space< / span > < span class = "o" > .< / span > < span class = "n" > seed< / span > < span class = "p" > (< / span > < span class = "mi" > 42< / span > < span class = "p" > )< / span >
< span class = "n" > observation< / span > < span class = "p" > ,< / span > < span class = "n" > info< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > reset< / span > < span class = "p" > (< / span > < span class = "n" > seed< / span > < span class = "o" > =< / span > < span class = "mi" > 42< / span > < span class = "p" > )< / span >
< span class = "k" > for< / span > < span class = "n" > _< / span > < span class = "ow" > in< / span > < span class = "nb" > range< / span > < span class = "p" > (< / span > < span class = "mi" > 1000< / span > < span class = "p" > ):< / span >
2022-09-19 20:16:35 +00:00
< span class = "n" > action< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > action_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "n" > observation< / span > < span class = "p" > ,< / span > < span class = "n" > reward< / span > < span class = "p" > ,< / span > < span class = "n" > terminated< / span > < span class = "p" > ,< / span > < span class = "n" > truncated< / span > < span class = "p" > ,< / span > < span class = "n" > info< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > step< / span > < span class = "p" > (< / span > < span class = "n" > action< / span > < span class = "p" > )< / span >
2022-09-13 19:29:14 +00:00
< span class = "k" > if< / span > < span class = "n" > terminated< / span > < span class = "ow" > or< / span > < span class = "n" > truncated< / span > < span class = "p" > :< / span >
< span class = "n" > observation< / span > < span class = "p" > ,< / span > < span class = "n" > info< / span > < span class = "o" > =< / span > < span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > reset< / span > < span class = "p" > ()< / span >
< span class = "n" > env< / span > < span class = "o" > .< / span > < span class = "n" > close< / span > < span class = "p" > ()< / span >
< / pre > < / div >
< / div >
< p > The output should look something like this< / p >
< figure class = "align-center" >
< a class = "reference internal image-reference" href = "https://user-images.githubusercontent.com/15806078/153222406-af5ce6f0-4696-4a24-a683-46ad4939170c.gif" > < img alt = "https://user-images.githubusercontent.com/15806078/153222406-af5ce6f0-4696-4a24-a683-46ad4939170c.gif" src = "https://user-images.githubusercontent.com/15806078/153222406-af5ce6f0-4696-4a24-a683-46ad4939170c.gif" style = "width: 50%;" / > < / a >
< / figure >
< p > Every environment specifies the format of valid actions by providing an < code class = "docutils literal notranslate" > < span class = "pre" > env.action_space< / span > < / code > attribute. Similarly,
the format of valid observations is specified by < code class = "docutils literal notranslate" > < span class = "pre" > env.observation_space< / span > < / code > .
In the example above we sampled random actions via < code class = "docutils literal notranslate" > < span class = "pre" > env.action_space.sample()< / span > < / code > . Note that we need to seed the action space separately from the
environment to ensure reproducible samples.< / p >
< / section >
< section id = "checking-api-conformity" >
< h2 > Checking API-Conformity< a class = "headerlink" href = "#checking-api-conformity" title = "Permalink to this heading" > #< / a > < / h2 >
< p > If you have implemented a custom environment and would like to perform a sanity check to make sure that it conforms to
the API, you can run:< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "gp" > > > > < / span > < span class = "kn" > from< / span > < span class = "nn" > gymnasium.utils.env_checker< / span > < span class = "kn" > import< / span > < span class = "n" > check_env< / span >
< span class = "gp" > > > > < / span > < span class = "n" > check_env< / span > < span class = "p" > (< / span > < span class = "n" > env< / span > < span class = "p" > )< / span >
< / pre > < / div >
< / div >
< p > This function will throw an exception if it seems like your environment does not follow the Gymnasium API. It will also produce
warnings if it looks like you made a mistake or do not follow a best practice (e.g. if < code class = "docutils literal notranslate" > < span class = "pre" > observation_space< / span > < / code > looks like
an image but does not have the right dtype). Warnings can be turned off by passing < code class = "docutils literal notranslate" > < span class = "pre" > warn=False< / span > < / code > . By default, < code class = "docutils literal notranslate" > < span class = "pre" > check_env< / span > < / code > will
not check the < code class = "docutils literal notranslate" > < span class = "pre" > render< / span > < / code > method. To change this behavior, you can pass < code class = "docutils literal notranslate" > < span class = "pre" > skip_render_check=False< / span > < / code > .< / p >
< blockquote >
< div > < p > After running < code class = "docutils literal notranslate" > < span class = "pre" > check_env< / span > < / code > on an environment, you should not reuse the instance that was checked, as it may have already
been closed!< / p >
< / div > < / blockquote >
< / section >
< section id = "spaces" >
< h2 > Spaces< a class = "headerlink" href = "#spaces" title = "Permalink to this heading" > #< / a > < / h2 >
< p > Spaces are usually used to specify the format of valid actions and observations.
Every environment should have the attributes < code class = "docutils literal notranslate" > < span class = "pre" > action_space< / span > < / code > and < code class = "docutils literal notranslate" > < span class = "pre" > observation_space< / span > < / code > , both of which should be instances
of classes that inherit from < code class = "docutils literal notranslate" > < span class = "pre" > Space< / span > < / code > .
There are multiple < code class = "docutils literal notranslate" > < span class = "pre" > Space< / span > < / code > types available in Gymnasium:< / p >
< ul class = "simple" >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > Box< / span > < / code > : describes an n-dimensional continuous space. It’ s a bounded space where we can define the upper and lower limits which describe the valid values our observations can take.< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > Discrete< / span > < / code > : describes a discrete space where {0, 1, …, n-1} are the possible values our observation or action can take. Values can be shifted to {a, a+1, …, a+n-1} using an optional argument.< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > Dict< / span > < / code > : represents a dictionary of simple spaces.< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > Tuple< / span > < / code > : represents a tuple of simple spaces.< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > MultiBinary< / span > < / code > : creates a n-shape binary space. Argument n can be a number or a < code class = "docutils literal notranslate" > < span class = "pre" > list< / span > < / code > of numbers.< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > MultiDiscrete< / span > < / code > : consists of a series of < code class = "docutils literal notranslate" > < span class = "pre" > Discrete< / span > < / code > action spaces with a different number of actions in each element.< / p > < / li >
< / ul >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "gp" > > > > < / span > < span class = "kn" > from< / span > < span class = "nn" > gymnasium.spaces< / span > < span class = "kn" > import< / span > < span class = "n" > Box< / span > < span class = "p" > ,< / span > < span class = "n" > Discrete< / span > < span class = "p" > ,< / span > < span class = "n" > Dict< / span > < span class = "p" > ,< / span > < span class = "n" > Tuple< / span > < span class = "p" > ,< / span > < span class = "n" > MultiBinary< / span > < span class = "p" > ,< / span > < span class = "n" > MultiDiscrete< / span >
< span class = "gp" > > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > =< / span > < span class = "n" > Box< / span > < span class = "p" > (< / span > < span class = "n" > low< / span > < span class = "o" > =-< / span > < span class = "mf" > 1.0< / span > < span class = "p" > ,< / span > < span class = "n" > high< / span > < span class = "o" > =< / span > < span class = "mf" > 2.0< / span > < span class = "p" > ,< / span > < span class = "n" > shape< / span > < span class = "o" > =< / span > < span class = "p" > (< / span > < span class = "mi" > 3< / span > < span class = "p" > ,),< / span > < span class = "n" > dtype< / span > < span class = "o" > =< / span > < span class = "n" > np< / span > < span class = "o" > .< / span > < span class = "n" > float32< / span > < span class = "p" > )< / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "go" > [ 1.6952509 -0.4399011 -0.7981693]< / span >
< span class = "go" > > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > =< / span > < span class = "n" > Discrete< / span > < span class = "p" > (< / span > < span class = "mi" > 4< / span > < span class = "p" > )< / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "go" > 1< / span >
< span class = "gp" > > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > =< / span > < span class = "n" > Discrete< / span > < span class = "p" > (< / span > < span class = "mi" > 5< / span > < span class = "p" > ,< / span > < span class = "n" > start< / span > < span class = "o" > =-< / span > < span class = "mi" > 2< / span > < span class = "p" > )< / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "go" > -2< / span >
< span class = "gp" > > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > =< / span > < span class = "n" > Dict< / span > < span class = "p" > ({< / span > < span class = "s2" > " position" < / span > < span class = "p" > :< / span > < span class = "n" > Discrete< / span > < span class = "p" > (< / span > < span class = "mi" > 2< / span > < span class = "p" > ),< / span > < span class = "s2" > " velocity" < / span > < span class = "p" > :< / span > < span class = "n" > Discrete< / span > < span class = "p" > (< / span > < span class = "mi" > 3< / span > < span class = "p" > )})< / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "go" > OrderedDict([(' position' , 0), (' velocity' , 1)])< / span >
< span class = "go" > > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > =< / span > < span class = "n" > Tuple< / span > < span class = "p" > ((< / span > < span class = "n" > Discrete< / span > < span class = "p" > (< / span > < span class = "mi" > 2< / span > < span class = "p" > ),< / span > < span class = "n" > Discrete< / span > < span class = "p" > (< / span > < span class = "mi" > 3< / span > < span class = "p" > )))< / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "go" > (1, 2)< / span >
< span class = "go" > > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > =< / span > < span class = "n" > MultiBinary< / span > < span class = "p" > (< / span > < span class = "mi" > 5< / span > < span class = "p" > )< / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "go" > [1 1 1 0 1]< / span >
< span class = "go" > > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > =< / span > < span class = "n" > MultiDiscrete< / span > < span class = "p" > ([< / span > < span class = "mi" > 5< / span > < span class = "p" > ,< / span > < span class = "mi" > 2< / span > < span class = "p" > ,< / span > < span class = "mi" > 2< / span > < span class = "p" > ])< / span >
< span class = "gp" > > > > < / span > < span class = "n" > observation_space< / span > < span class = "o" > .< / span > < span class = "n" > sample< / span > < span class = "p" > ()< / span >
< span class = "go" > [3 0 0]< / span >
< / pre > < / div >
< / div >
< / section >
< section id = "wrappers" >
< h2 > Wrappers< a class = "headerlink" href = "#wrappers" title = "Permalink to this heading" > #< / a > < / h2 >
< p > Wrappers are a convenient way to modify an existing environment without having to alter the underlying code directly.
Using wrappers will allow you to avoid a lot of boilerplate code and make your environment more modular. Wrappers can
also be chained to combine their effects. Most environments that are generated via < code class = "docutils literal notranslate" > < span class = "pre" > gymnasium.make< / span > < / code > will already be wrapped by default.< / p >
< p > In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along
with (possibly optional) parameters to the wrapper’ s constructor:< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "gp" > > > > < / span > < span class = "kn" > import< / span > < span class = "nn" > gymnasium< / span >
< span class = "gp" > > > > < / span > < span class = "kn" > from< / span > < span class = "nn" > gymnasium.wrappers< / span > < span class = "kn" > import< / span > < span class = "n" > RescaleAction< / span >
< span class = "gp" > > > > < / span > < span class = "n" > base_env< / span > < span class = "o" > =< / span > < span class = "n" > gymnasium< / span > < span class = "o" > .< / span > < span class = "n" > make< / span > < span class = "p" > (< / span > < span class = "s2" > " BipedalWalker-v3" < / span > < span class = "p" > )< / span >
< span class = "gp" > > > > < / span > < span class = "n" > base_env< / span > < span class = "o" > .< / span > < span class = "n" > action_space< / span >
< span class = "go" > Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32)< / span >
< span class = "gp" > > > > < / span > < span class = "n" > wrapped_env< / span > < span class = "o" > =< / span > < span class = "n" > RescaleAction< / span > < span class = "p" > (< / span > < span class = "n" > base_env< / span > < span class = "p" > ,< / span > < span class = "n" > min_action< / span > < span class = "o" > =< / span > < span class = "mi" > 0< / span > < span class = "p" > ,< / span > < span class = "n" > max_action< / span > < span class = "o" > =< / span > < span class = "mi" > 1< / span > < span class = "p" > )< / span >
< span class = "gp" > > > > < / span > < span class = "n" > wrapped_env< / span > < span class = "o" > .< / span > < span class = "n" > action_space< / span >
< span class = "go" > Box([0. 0. 0. 0.], [1. 1. 1. 1.], (4,), float32)< / span >
< / pre > < / div >
< / div >
< p > There are three very common things you might want a wrapper to do:< / p >
< ul class = "simple" >
< li > < p > Transform actions before applying them to the base environment< / p > < / li >
< li > < p > Transform observations that are returned by the base environment< / p > < / li >
< li > < p > Transform rewards that are returned by the base environment< / p > < / li >
< / ul >
< p > Such wrappers can be easily implemented by inheriting from < code class = "docutils literal notranslate" > < span class = "pre" > ActionWrapper< / span > < / code > , < code class = "docutils literal notranslate" > < span class = "pre" > ObservationWrapper< / span > < / code > , or < code class = "docutils literal notranslate" > < span class = "pre" > RewardWrapper< / span > < / code > and implementing the
respective transformation.< / p >
< p > However, sometimes you might need to implement a wrapper that does some more complicated modifications (e.g. modify the
reward based on data in < code class = "docutils literal notranslate" > < span class = "pre" > info< / span > < / code > ). Such wrappers
can be implemented by inheriting from < code class = "docutils literal notranslate" > < span class = "pre" > Wrapper< / span > < / code > .
Gymnasium already provides many commonly used wrappers for you. Some examples:< / p >
< ul class = "simple" >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > TimeLimit< / span > < / code > : Issue a done signal if a maximum number of timesteps has been exceeded (or the base environment has issued a done signal).< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > ClipAction< / span > < / code > : Clip the action such that it lies in the action space (of type < code class = "docutils literal notranslate" > < span class = "pre" > Box< / span > < / code > ).< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > RescaleAction< / span > < / code > : Rescale actions to lie in a specified interval< / p > < / li >
< li > < p > < code class = "docutils literal notranslate" > < span class = "pre" > TimeAwareObservation< / span > < / code > : Add information about the index of timestep to observation. In some cases helpful to ensure that transitions are Markov.< / p > < / li >
< / ul >
< p > If you have a wrapped environment, and you want to get the unwrapped environment underneath all of the layers of wrappers (so that you can manually call a function or change some underlying aspect of the environment), you can use the < code class = "docutils literal notranslate" > < span class = "pre" > .unwrapped< / span > < / code > attribute. If the environment is already a base environment, the < code class = "docutils literal notranslate" > < span class = "pre" > .unwrapped< / span > < / code > attribute will just return itself.< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "gp" > > > > < / span > < span class = "n" > wrapped_env< / span >
< span class = "go" > < RescaleAction< TimeLimit< BipedalWalker< BipedalWalker-v3> > > > < / span >
< span class = "gp" > > > > < / span > < span class = "n" > wrapped_env< / span > < span class = "o" > .< / span > < span class = "n" > unwrapped< / span >
< span class = "go" > < gymnasium.envs.box2d.bipedal_walker.BipedalWalker object at 0x7f87d70712d0> < / span >
< / pre > < / div >
< / div >
< / section >
< section id = "playing-within-an-environment" >
< h2 > Playing within an environment< a class = "headerlink" href = "#playing-within-an-environment" title = "Permalink to this heading" > #< / a > < / h2 >
< p > You can also play the environment using your keyboard using the < code class = "docutils literal notranslate" > < span class = "pre" > play< / span > < / code > function in < code class = "docutils literal notranslate" > < span class = "pre" > gymnasium.utils.play< / span > < / code > .< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "kn" > from< / span > < span class = "nn" > gymnasium.utils.play< / span > < span class = "kn" > import< / span > < span class = "n" > play< / span >
< span class = "n" > play< / span > < span class = "p" > (< / span > < span class = "n" > gymnasium< / span > < span class = "o" > .< / span > < span class = "n" > make< / span > < span class = "p" > (< / span > < span class = "s1" > ' Pong-v0' < / span > < span class = "p" > ))< / span >
< / pre > < / div >
< / div >
< p > This opens a window of the environment and allows you to control the agent using your keyboard.< / p >
< p > Playing using the keyboard requires a key-action map. This map should have type < code class = "docutils literal notranslate" > < span class = "pre" > dict[tuple[int],< / span > < span class = "pre" > int< / span > < span class = "pre" > |< / span > < span class = "pre" > None]< / span > < / code > , which maps the keys pressed to action performed.
For example, if pressing the keys < code class = "docutils literal notranslate" > < span class = "pre" > w< / span > < / code > and < code class = "docutils literal notranslate" > < span class = "pre" > space< / span > < / code > at the same time is supposed to perform action < code class = "docutils literal notranslate" > < span class = "pre" > 2< / span > < / code > , then the < code class = "docutils literal notranslate" > < span class = "pre" > key_to_action< / span > < / code > dict should look like:< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "p" > {< / span >
< span class = "c1" > # ...< / span >
< span class = "p" > (< / span > < span class = "nb" > ord< / span > < span class = "p" > (< / span > < span class = "s1" > ' w' < / span > < span class = "p" > ),< / span > < span class = "nb" > ord< / span > < span class = "p" > (< / span > < span class = "s1" > ' ' < / span > < span class = "p" > )):< / span > < span class = "mi" > 2< / span > < span class = "p" > ,< / span >
< span class = "c1" > # ...< / span >
< span class = "p" > }< / span >
< / pre > < / div >
< / div >
< p > As a more complete example, let’ s say we wish to play with < code class = "docutils literal notranslate" > < span class = "pre" > CartPole-v0< / span > < / code > using our left and right arrow keys. The code would be as follows:< / p >
2022-09-19 20:16:35 +00:00
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "kn" > import< / span > < span class = "nn" > gymnasium< / span > < span class = "k" > as< / span > < span class = "nn" > gym< / span >
2022-09-13 19:29:14 +00:00
< span class = "kn" > import< / span > < span class = "nn" > pygame< / span >
< span class = "kn" > from< / span > < span class = "nn" > gymnasium.utils.play< / span > < span class = "kn" > import< / span > < span class = "n" > play< / span >
< span class = "n" > mapping< / span > < span class = "o" > =< / span > < span class = "p" > {(< / span > < span class = "n" > pygame< / span > < span class = "o" > .< / span > < span class = "n" > K_LEFT< / span > < span class = "p" > ,):< / span > < span class = "mi" > 0< / span > < span class = "p" > ,< / span > < span class = "p" > (< / span > < span class = "n" > pygame< / span > < span class = "o" > .< / span > < span class = "n" > K_RIGHT< / span > < span class = "p" > ,):< / span > < span class = "mi" > 1< / span > < span class = "p" > }< / span >
< span class = "n" > play< / span > < span class = "p" > (< / span > < span class = "n" > gymnasium< / span > < span class = "o" > .< / span > < span class = "n" > make< / span > < span class = "p" > (< / span > < span class = "s2" > " CartPole-v0" < / span > < span class = "p" > ),< / span > < span class = "n" > keys_to_action< / span > < span class = "o" > =< / span > < span class = "n" > mapping< / span > < span class = "p" > )< / span >
< / pre > < / div >
< / div >
< p > where we obtain the corresponding key ID constants from pygame. If the < code class = "docutils literal notranslate" > < span class = "pre" > key_to_action< / span > < / code > argument is not specified, then the default < code class = "docutils literal notranslate" > < span class = "pre" > key_to_action< / span > < / code > mapping for that env is used, if provided.< / p >
< p > Furthermore, if you wish to plot real time statistics as you play, you can use < code class = "docutils literal notranslate" > < span class = "pre" > gymnasium.utils.play.PlayPlot< / span > < / code > . Here’ s some sample code for plotting the reward for last 5 second of gameplay:< / p >
< div class = "highlight-python notranslate" > < div class = "highlight" > < pre > < span > < / span > < span class = "k" > def< / span > < span class = "nf" > callback< / span > < span class = "p" > (< / span > < span class = "n" > obs_t< / span > < span class = "p" > ,< / span > < span class = "n" > obs_tp1< / span > < span class = "p" > ,< / span > < span class = "n" > action< / span > < span class = "p" > ,< / span > < span class = "n" > rew< / span > < span class = "p" > ,< / span > < span class = "n" > done< / span > < span class = "p" > ,< / span > < span class = "n" > info< / span > < span class = "p" > ):< / span >
< span class = "k" > return< / span > < span class = "p" > [< / span > < span class = "n" > rew< / span > < span class = "p" > ,]< / span >
< span class = "n" > plotter< / span > < span class = "o" > =< / span > < span class = "n" > PlayPlot< / span > < span class = "p" > (< / span > < span class = "n" > callback< / span > < span class = "p" > ,< / span > < span class = "mi" > 30< / span > < span class = "o" > *< / span > < span class = "mi" > 5< / span > < span class = "p" > ,< / span > < span class = "p" > [< / span > < span class = "s2" > " reward" < / span > < span class = "p" > ])< / span >
< span class = "n" > env< / span > < span class = "o" > =< / span > < span class = "n" > gymnasium< / span > < span class = "o" > .< / span > < span class = "n" > make< / span > < span class = "p" > (< / span > < span class = "s2" > " Pong-v0" < / span > < span class = "p" > )< / span >
< span class = "n" > play< / span > < span class = "p" > (< / span > < span class = "n" > env< / span > < span class = "p" > ,< / span > < span class = "n" > callback< / span > < span class = "o" > =< / span > < span class = "n" > plotter< / span > < span class = "o" > .< / span > < span class = "n" > callback< / span > < span class = "p" > )< / span >
< / pre > < / div >
< / div >
< / section >
< / section >
< / article >
< / div >
< footer >
< div class = "related-pages" >
< a class = "next-page" href = "../../api/core/" >
< div class = "page-info" >
< div class = "context" >
< span > Next< / span >
< / div >
< div class = "title" > Core< / div >
< / div >
< svg class = "furo-related-icon" > < use href = "#svg-arrow-right" > < / use > < / svg >
< / a >
< / div >
< div class = "bottom-of-page" >
< div class = "left-details" >
< div class = "copyright" >
Copyright © 2022, Farama Foundation
< / div >
Made with < a href = "https://www.sphinx-doc.org/" > Sphinx< / a > and < a class = "muted-link" href = "https://pradyunsg.me" > @pradyunsg< / a > 's
< a href = "https://github.com/pradyunsg/furo" > Furo< / a >
< / div >
< div class = "right-details" >
< div class = "icons" >
< / div >
< / div >
< / div >
< / footer >
< / div >
< aside class = "toc-drawer" >
< div class = "toc-sticky toc-scroll" >
< div class = "toc-title-container" >
< span class = "toc-title" >
On this page
< / span >
< / div >
< div class = "toc-tree-container" >
< div class = "toc-tree" >
< ul >
< li > < a class = "reference internal" href = "#" > Basic Usage< / a > < ul >
< li > < a class = "reference internal" href = "#initializing-environments" > Initializing Environments< / a > < / li >
< li > < a class = "reference internal" href = "#interacting-with-the-environment" > Interacting with the Environment< / a > < / li >
< li > < a class = "reference internal" href = "#checking-api-conformity" > Checking API-Conformity< / a > < / li >
< li > < a class = "reference internal" href = "#spaces" > Spaces< / a > < / li >
< li > < a class = "reference internal" href = "#wrappers" > Wrappers< / a > < / li >
< li > < a class = "reference internal" href = "#playing-within-an-environment" > Playing within an environment< / a > < / li >
< / ul >
< / li >
< / ul >
< / div >
< / div >
< / div >
< / aside >
< / div >
< / div > < script data-url_root = "../../" id = "documentation_options" src = "../../_static/documentation_options.js" > < / script >
< script src = "../../_static/jquery.js" > < / script >
< script src = "../../_static/underscore.js" > < / script >
< script src = "../../_static/_sphinx_javascript_frameworks_compat.js" > < / script >
< script src = "../../_static/doctools.js" > < / script >
< script src = "../../_static/scripts/furo.js" > < / script >
< script >
(() => {
if (!localStorage.getItem("shownCookieAlert")) {
const boxElem = document.createElement("div");
boxElem.classList.add("cookie_alert");
const containerElem = document.createElement("div");
containerElem.classList.add("cookie_container");
const textElem = document.createElement("p");
textElem.innerHTML = `This page uses < a href = "https://analytics.google.com/" >
Google Analytics< / a > to collect statistics. You can disable it by blocking
the JavaScript coming from www.google-analytics.com.`;
containerElem.appendChild(textElem);
const closeBtn = document.createElement("button");
closeBtn.innerHTML = `<?xml version="1.0" ?> < svg viewBox = "0 0 32 32" xmlns = "http://www.w3.org/2000/svg" > < defs > < style > . cls-1 { fill : none ; stroke : #000 ; stroke-linecap : round ; stroke-linejoin : round ; stroke-width : 2 px ; } < / style > < / defs > < title / > < g id = "cross" > < line class = "cls-1" x1 = "7" x2 = "25" y1 = "7" y2 = "25" / > < line class = "cls-1" x1 = "7" x2 = "25" y1 = "25" y2 = "7" / > < / g > < / svg > `
closeBtn.onclick = () => {
localStorage.setItem("shownCookieAlert", "true");
boxElem.style.display = "none";
}
containerElem.appendChild(closeBtn);
boxElem.appendChild(containerElem);
document.body.appendChild(boxElem);
}
})()
< / script >
<!-- Google tag (gtag.js) -->
< script async src = "https://www.googletagmanager.com/gtag/js?id=G-JGXSLW7N06" > < / script >
< script >
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-JGXSLW7N06');
< / script > < / body >
< / html >