mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 06:07:08 +00:00
@@ -146,6 +146,8 @@ title: {title_env_name}
|
|||||||
if hasattr(low, "shape"):
|
if hasattr(low, "shape"):
|
||||||
if len(low.shape) == 3:
|
if len(low.shape) == 3:
|
||||||
low = low[0][0][0]
|
low = low[0][0][0]
|
||||||
|
if env_type == "mujoco":
|
||||||
|
low = low[0]
|
||||||
low = np.round(low, 2)
|
low = np.round(low, 2)
|
||||||
low = str(low).replace("\n", " ")
|
low = str(low).replace("\n", " ")
|
||||||
env_table += f"| Observation Low | {low} |\n"
|
env_table += f"| Observation Low | {low} |\n"
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
# fmt: off
|
||||||
"""
|
"""
|
||||||
Make your own custom environment
|
Make your own custom environment
|
||||||
================================
|
================================
|
||||||
@@ -142,23 +143,23 @@ class GridWorldEnv(gym.Env):
|
|||||||
self.window = None
|
self.window = None
|
||||||
self.clock = None
|
self.clock = None
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Constructing Observations From Environment States
|
# Constructing Observations From Environment States
|
||||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
#
|
#
|
||||||
# Since we will need to compute observations both in ``reset`` and
|
# Since we will need to compute observations both in ``reset`` and
|
||||||
# ``step``, it is often convenient to have a (private) method ``_get_obs``
|
# ``step``, it is often convenient to have a (private) method ``_get_obs``
|
||||||
# that translates the environment’s state into an observation. However,
|
# that translates the environment’s state into an observation. However,
|
||||||
# this is not mandatory and you may as well compute observations in
|
# this is not mandatory and you may as well compute observations in
|
||||||
# ``reset`` and ``step`` separately:
|
# ``reset`` and ``step`` separately:
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return {"agent": self._agent_location, "target": self._target_location}
|
return {"agent": self._agent_location, "target": self._target_location}
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# We can also implement a similar method for the auxiliary information
|
# We can also implement a similar method for the auxiliary information
|
||||||
# that is returned by ``step`` and ``reset``. In our case, we would like
|
# that is returned by ``step`` and ``reset``. In our case, we would like
|
||||||
# to provide the manhattan distance between the agent and the target:
|
# to provide the manhattan distance between the agent and the target:
|
||||||
|
|
||||||
def _get_info(self):
|
def _get_info(self):
|
||||||
return {
|
return {
|
||||||
@@ -167,34 +168,34 @@ class GridWorldEnv(gym.Env):
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Oftentimes, info will also contain some data that is only available
|
# Oftentimes, info will also contain some data that is only available
|
||||||
# inside the ``step`` method (e.g. individual reward terms). In that case,
|
# inside the ``step`` method (e.g. individual reward terms). In that case,
|
||||||
# we would have to update the dictionary that is returned by ``_get_info``
|
# we would have to update the dictionary that is returned by ``_get_info``
|
||||||
# in ``step``.
|
# in ``step``.
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Reset
|
# Reset
|
||||||
# ~~~~~
|
# ~~~~~
|
||||||
#
|
#
|
||||||
# The ``reset`` method will be called to initiate a new episode. You may
|
# The ``reset`` method will be called to initiate a new episode. You may
|
||||||
# assume that the ``step`` method will not be called before ``reset`` has
|
# assume that the ``step`` method will not be called before ``reset`` has
|
||||||
# been called. Moreover, ``reset`` should be called whenever a done signal
|
# been called. Moreover, ``reset`` should be called whenever a done signal
|
||||||
# has been issued. Users may pass the ``seed`` keyword to ``reset`` to
|
# has been issued. Users may pass the ``seed`` keyword to ``reset`` to
|
||||||
# initialize any random number generator that is used by the environment
|
# initialize any random number generator that is used by the environment
|
||||||
# to a deterministic state. It is recommended to use the random number
|
# to a deterministic state. It is recommended to use the random number
|
||||||
# generator ``self.np_random`` that is provided by the environment’s base
|
# generator ``self.np_random`` that is provided by the environment’s base
|
||||||
# class, ``gymnasium.Env``. If you only use this RNG, you do not need to
|
# class, ``gymnasium.Env``. If you only use this RNG, you do not need to
|
||||||
# worry much about seeding, *but you need to remember to call
|
# worry much about seeding, *but you need to remember to call
|
||||||
# ``super().reset(seed=seed)``* to make sure that ``gymnasium.Env``
|
# ``super().reset(seed=seed)``* to make sure that ``gymnasium.Env``
|
||||||
# correctly seeds the RNG. Once this is done, we can randomly set the
|
# correctly seeds the RNG. Once this is done, we can randomly set the
|
||||||
# state of our environment. In our case, we randomly choose the agent’s
|
# state of our environment. In our case, we randomly choose the agent’s
|
||||||
# location and the random sample target positions, until it does not
|
# location and the random sample target positions, until it does not
|
||||||
# coincide with the agent’s position.
|
# coincide with the agent’s position.
|
||||||
#
|
#
|
||||||
# The ``reset`` method should return a tuple of the initial observation
|
# The ``reset`` method should return a tuple of the initial observation
|
||||||
# and some auxiliary information. We can use the methods ``_get_obs`` and
|
# and some auxiliary information. We can use the methods ``_get_obs`` and
|
||||||
# ``_get_info`` that we implemented earlier for that:
|
# ``_get_info`` that we implemented earlier for that:
|
||||||
|
|
||||||
def reset(self, seed=None, options=None):
|
def reset(self, seed=None, options=None):
|
||||||
# We need the following line to seed self.np_random
|
# We need the following line to seed self.np_random
|
||||||
@@ -218,19 +219,19 @@ class GridWorldEnv(gym.Env):
|
|||||||
|
|
||||||
return observation, info
|
return observation, info
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Step
|
# Step
|
||||||
# ~~~~
|
# ~~~~
|
||||||
#
|
#
|
||||||
# The ``step`` method usually contains most of the logic of your
|
# The ``step`` method usually contains most of the logic of your
|
||||||
# environment. It accepts an ``action``, computes the state of the
|
# environment. It accepts an ``action``, computes the state of the
|
||||||
# environment after applying that action and returns the 4-tuple
|
# environment after applying that action and returns the 4-tuple
|
||||||
# ``(observation, reward, done, info)``. Once the new state of the
|
# ``(observation, reward, done, info)``. Once the new state of the
|
||||||
# environment has been computed, we can check whether it is a terminal
|
# environment has been computed, we can check whether it is a terminal
|
||||||
# state and we set ``done`` accordingly. Since we are using sparse binary
|
# state and we set ``done`` accordingly. Since we are using sparse binary
|
||||||
# rewards in ``GridWorldEnv``, computing ``reward`` is trivial once we
|
# rewards in ``GridWorldEnv``, computing ``reward`` is trivial once we
|
||||||
# know ``done``. To gather ``observation`` and ``info``, we can again make
|
# know ``done``. To gather ``observation`` and ``info``, we can again make
|
||||||
# use of ``_get_obs`` and ``_get_info``:
|
# use of ``_get_obs`` and ``_get_info``:
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
# Map the action (element of {0,1,2,3}) to the direction we walk in
|
# Map the action (element of {0,1,2,3}) to the direction we walk in
|
||||||
@@ -250,90 +251,90 @@ class GridWorldEnv(gym.Env):
|
|||||||
|
|
||||||
return observation, reward, terminated, False, info
|
return observation, reward, terminated, False, info
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Rendering
|
# Rendering
|
||||||
# ~~~~~~~~~
|
# ~~~~~~~~~
|
||||||
#
|
#
|
||||||
# Here, we are using PyGame for rendering. A similar approach to rendering
|
# Here, we are using PyGame for rendering. A similar approach to rendering
|
||||||
# is used in many environments that are included with Gymnasium and you
|
# is used in many environments that are included with Gymnasium and you
|
||||||
# can use it as a skeleton for your own environments:
|
# can use it as a skeleton for your own environments:
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
if self.render_mode == "rgb_array":
|
if self.render_mode == "rgb_array":
|
||||||
return self._render_frame()
|
return self._render_frame()
|
||||||
|
|
||||||
def _render_frame(self):
|
def _render_frame(self):
|
||||||
if self.window is None and self.render_mode == "human":
|
if self.window is None and self.render_mode == "human":
|
||||||
pygame.init()
|
pygame.init()
|
||||||
pygame.display.init()
|
pygame.display.init()
|
||||||
self.window = pygame.display.set_mode(
|
self.window = pygame.display.set_mode(
|
||||||
(self.window_size, self.window_size)
|
(self.window_size, self.window_size)
|
||||||
)
|
|
||||||
if self.clock is None and self.render_mode == "human":
|
|
||||||
self.clock = pygame.time.Clock()
|
|
||||||
|
|
||||||
canvas = pygame.Surface((self.window_size, self.window_size))
|
|
||||||
canvas.fill((255, 255, 255))
|
|
||||||
pix_square_size = (
|
|
||||||
self.window_size / self.size
|
|
||||||
) # The size of a single grid square in pixels
|
|
||||||
|
|
||||||
# First we draw the target
|
|
||||||
pygame.draw.rect(
|
|
||||||
canvas,
|
|
||||||
(255, 0, 0),
|
|
||||||
pygame.Rect(
|
|
||||||
pix_square_size * self._target_location,
|
|
||||||
(pix_square_size, pix_square_size),
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
# Now we draw the agent
|
if self.clock is None and self.render_mode == "human":
|
||||||
pygame.draw.circle(
|
self.clock = pygame.time.Clock()
|
||||||
|
|
||||||
|
canvas = pygame.Surface((self.window_size, self.window_size))
|
||||||
|
canvas.fill((255, 255, 255))
|
||||||
|
pix_square_size = (
|
||||||
|
self.window_size / self.size
|
||||||
|
) # The size of a single grid square in pixels
|
||||||
|
|
||||||
|
# First we draw the target
|
||||||
|
pygame.draw.rect(
|
||||||
|
canvas,
|
||||||
|
(255, 0, 0),
|
||||||
|
pygame.Rect(
|
||||||
|
pix_square_size * self._target_location,
|
||||||
|
(pix_square_size, pix_square_size),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
# Now we draw the agent
|
||||||
|
pygame.draw.circle(
|
||||||
|
canvas,
|
||||||
|
(0, 0, 255),
|
||||||
|
(self._agent_location + 0.5) * pix_square_size,
|
||||||
|
pix_square_size / 3,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Finally, add some gridlines
|
||||||
|
for x in range(self.size + 1):
|
||||||
|
pygame.draw.line(
|
||||||
canvas,
|
canvas,
|
||||||
(0, 0, 255),
|
0,
|
||||||
(self._agent_location + 0.5) * pix_square_size,
|
(0, pix_square_size * x),
|
||||||
pix_square_size / 3,
|
(self.window_size, pix_square_size * x),
|
||||||
|
width=3,
|
||||||
|
)
|
||||||
|
pygame.draw.line(
|
||||||
|
canvas,
|
||||||
|
0,
|
||||||
|
(pix_square_size * x, 0),
|
||||||
|
(pix_square_size * x, self.window_size),
|
||||||
|
width=3,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Finally, add some gridlines
|
if self.render_mode == "human":
|
||||||
for x in range(self.size + 1):
|
# The following line copies our drawings from `canvas` to the visible window
|
||||||
pygame.draw.line(
|
self.window.blit(canvas, canvas.get_rect())
|
||||||
canvas,
|
pygame.event.pump()
|
||||||
0,
|
pygame.display.update()
|
||||||
(0, pix_square_size * x),
|
|
||||||
(self.window_size, pix_square_size * x),
|
|
||||||
width=3,
|
|
||||||
)
|
|
||||||
pygame.draw.line(
|
|
||||||
canvas,
|
|
||||||
0,
|
|
||||||
(pix_square_size * x, 0),
|
|
||||||
(pix_square_size * x, self.window_size),
|
|
||||||
width=3,
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.render_mode == "human":
|
# We need to ensure that human-rendering occurs at the predefined framerate.
|
||||||
# The following line copies our drawings from `canvas` to the visible window
|
# The following line will automatically add a delay to keep the framerate stable.
|
||||||
self.window.blit(canvas, canvas.get_rect())
|
self.clock.tick(self.metadata["render_fps"])
|
||||||
pygame.event.pump()
|
else: # rgb_array
|
||||||
pygame.display.update()
|
return np.transpose(
|
||||||
|
np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
|
||||||
|
)
|
||||||
|
|
||||||
# We need to ensure that human-rendering occurs at the predefined framerate.
|
# %%
|
||||||
# The following line will automatically add a delay to keep the framerate stable.
|
# Close
|
||||||
self.clock.tick(self.metadata["render_fps"])
|
# ~~~~~
|
||||||
else: # rgb_array
|
#
|
||||||
return np.transpose(
|
# The ``close`` method should close any open resources that were used by
|
||||||
np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
|
# the environment. In many cases, you don’t actually have to bother to
|
||||||
)
|
# implement this method. However, in our example ``render_mode`` may be
|
||||||
|
# ``"human"`` and we might need to close the window that has been opened:
|
||||||
# %%
|
|
||||||
# Close
|
|
||||||
# ~~~~~
|
|
||||||
#
|
|
||||||
# The ``close`` method should close any open resources that were used by
|
|
||||||
# the environment. In many cases, you don’t actually have to bother to
|
|
||||||
# implement this method. However, in our example ``render_mode`` may be
|
|
||||||
# ``"human"`` and we might need to close the window that has been opened:
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.window is not None:
|
if self.window is not None:
|
||||||
|
Reference in New Issue
Block a user