mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-31 18:12:53 +00:00
* Updated cartpole-v0 to v1 to prevent warning and added pytest.mark.filterwarnings for tests where warnings are unavoidable * Change np.bool to bool as numpy raises a warning and bool is the suggested solution * Seeding randint is deprecated in the future, integers is new solution * Fixed errors thrown when the video recorder is deleted but not closed * spaces.Box expects a floating array, updated all cases where this was not true and modified float32 to float64 as float array default to float64. Otherwise space.Box raises warning that dtype precision (float32) is lower than array precision (float64). * Added pytest.mark.filterwarnings to preventing the raising of an intended warning * Added comment to explain why a warning is raised that can't be prevented without version update to the environment * Added comment to explain why warning is raised * Changed values to float as expected by the box which default to float64 * Removed --forked from pytest as the pytest-forked project is no being maintained and was not raising warnings as expected * When AsyncVectorEnv has shared_memory=True then a ValueError is raised before _state is initialised. Therefore, on the destruction on the env an error is thrown in .close_extra as _state does not exist * Possible fix that was causing an error in test_call_async_vector_env by ensuring that pygame resources are released * Pygame throws an error with ALSA when closed, using a fix from PettingZoo (https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/__init__.py). We use the dsp audiodriver to prevent this issue * Modification due to running pre-commit locally * Updated cartpole-v0 to v1 to prevent warning and added pytest.mark.filterwarnings for tests where warnings are unavoidable * Change np.bool to bool as numpy raises a warning and bool is the suggested solution * Seeding randint is deprecated in the future, integers is new solution * Fixed errors thrown when the video recorder is deleted but not closed * spaces.Box expects a floating array, updated all cases where this was not true and modified float32 to float64 as float array default to float64. Otherwise space.Box raises warning that dtype precision (float32) is lower than array precision (float64). * Added pytest.mark.filterwarnings to preventing the raising of an intended warning * Added comment to explain why a warning is raised that can't be prevented without version update to the environment * Added comment to explain why warning is raised * Changed values to float as expected by the box which default to float64 * Removed --forked from pytest as the pytest-forked project is no being maintained and was not raising warnings as expected * When AsyncVectorEnv has shared_memory=True then a ValueError is raised before _state is initialised. Therefore, on the destruction on the env an error is thrown in .close_extra as _state does not exist * Possible fix that was causing an error in test_call_async_vector_env by ensuring that pygame resources are released * Pygame throws an error with ALSA when closed, using a fix from PettingZoo (https://github.com/Farama-Foundation/PettingZoo/blob/master/pettingzoo/__init__.py). We use the dsp audiodriver to prevent this issue * Modification due to running pre-commit locally
230 lines
7.9 KiB
Python
230 lines
7.9 KiB
Python
__credits__ = ["Carlos Luis"]
|
|
|
|
from typing import Optional
|
|
from os import path
|
|
|
|
import numpy as np
|
|
import pygame
|
|
from pygame import gfxdraw
|
|
|
|
import gym
|
|
from gym import spaces
|
|
from gym.utils import seeding
|
|
|
|
|
|
class PendulumEnv(gym.Env):
|
|
"""
|
|
### Description
|
|
|
|
The inverted pendulum swingup problem is based on the classic problem in control theory. The system consists of a pendulum attached at one end to a fixed point, and the other end being free. The pendulum starts in a random position and the goal is to apply torque on the free end to swing it into an upright position, with its center of gravity right above the fixed point.
|
|
|
|
The diagram below specifies the coordinate system used for the implementation of the pendulum's
|
|
dynamic equations.
|
|
|
|

|
|
|
|
- `x-y`: cartesian coordinates of the pendulum's end in meters.
|
|
- `theta` : angle in radians.
|
|
- `tau`: torque in `N m`. Defined as positive _counter-clockwise_.
|
|
|
|
### Action Space
|
|
|
|
The action is a `ndarray` with shape `(1,)` representing the torque applied to free end of the pendulum.
|
|
|
|
| Num | Action | Min | Max |
|
|
|-----|--------|------|-----|
|
|
| 0 | Torque | -2.0 | 2.0 |
|
|
|
|
|
|
### Observation Space
|
|
|
|
The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free end and its angular velocity.
|
|
|
|
| Num | Observation | Min | Max |
|
|
|-----|------------------|------|-----|
|
|
| 0 | x = cos(theta) | -1.0 | 1.0 |
|
|
| 1 | y = sin(angle) | -1.0 | 1.0 |
|
|
| 2 | Angular Velocity | -8.0 | 8.0 |
|
|
|
|
### Rewards
|
|
|
|
The reward function is defined as:
|
|
|
|
*r = -(theta<sup>2</sup> + 0.1 * theta_dt<sup>2</sup> + 0.001 * torque<sup>2</sup>)*
|
|
|
|
where `$\theta$` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position).
|
|
Based on the above equation, the minimum reward that can be obtained is *-(pi<sup>2</sup> + 0.1 * 8<sup>2</sup> + 0.001 * 2<sup>2</sup>) = -16.2736044*, while the maximum reward is zero (pendulum is
|
|
upright with zero velocity and no torque applied).
|
|
|
|
### Starting State
|
|
|
|
The starting state is a random angle in *[-pi, pi]* and a random angular velocity in *[-1,1]*.
|
|
|
|
### Episode Termination
|
|
|
|
The episode terminates at 200 time steps.
|
|
|
|
### Arguments
|
|
|
|
- `g`: acceleration of gravity measured in *(m s<sup>-2</sup>)* used to calculate the pendulum dynamics. The default value is g = 10.0 .
|
|
|
|
```
|
|
gym.make('Pendulum-v1', g=9.81)
|
|
```
|
|
|
|
### Version History
|
|
|
|
* v1: Simplify the math equations, no difference in behavior.
|
|
* v0: Initial versions release (1.0.0)
|
|
|
|
"""
|
|
|
|
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
|
|
|
|
def __init__(self, g=10.0):
|
|
self.max_speed = 8
|
|
self.max_torque = 2.0
|
|
self.dt = 0.05
|
|
self.g = g
|
|
self.m = 1.0
|
|
self.l = 1.0
|
|
self.screen = None
|
|
self.clock = None
|
|
self.isopen = True
|
|
|
|
self.screen_dim = 500
|
|
|
|
high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
|
|
# This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
|
|
# or normalised as max_torque == 2 by default. Ignoring the issue here as the default settings are too old
|
|
# to update to follow the openai gym api
|
|
self.action_space = spaces.Box(
|
|
low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
|
|
)
|
|
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
|
|
|
|
def step(self, u):
|
|
th, thdot = self.state # th := theta
|
|
|
|
g = self.g
|
|
m = self.m
|
|
l = self.l
|
|
dt = self.dt
|
|
|
|
u = np.clip(u, -self.max_torque, self.max_torque)[0]
|
|
self.last_u = u # for rendering
|
|
costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)
|
|
|
|
newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt
|
|
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
|
|
newth = th + newthdot * dt
|
|
|
|
self.state = np.array([newth, newthdot])
|
|
return self._get_obs(), -costs, False, {}
|
|
|
|
def reset(
|
|
self,
|
|
*,
|
|
seed: Optional[int] = None,
|
|
return_info: bool = False,
|
|
options: Optional[dict] = None
|
|
):
|
|
super().reset(seed=seed)
|
|
high = np.array([np.pi, 1])
|
|
self.state = self.np_random.uniform(low=-high, high=high)
|
|
self.last_u = None
|
|
if not return_info:
|
|
return self._get_obs()
|
|
else:
|
|
return self._get_obs(), {}
|
|
|
|
def _get_obs(self):
|
|
theta, thetadot = self.state
|
|
return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
|
|
|
|
def render(self, mode="human"):
|
|
if self.screen is None:
|
|
pygame.init()
|
|
pygame.display.init()
|
|
self.screen = pygame.display.set_mode((self.screen_dim, self.screen_dim))
|
|
if self.clock is None:
|
|
self.clock = pygame.time.Clock()
|
|
|
|
self.surf = pygame.Surface((self.screen_dim, self.screen_dim))
|
|
self.surf.fill((255, 255, 255))
|
|
|
|
bound = 2.2
|
|
scale = self.screen_dim / (bound * 2)
|
|
offset = self.screen_dim // 2
|
|
|
|
rod_length = 1 * scale
|
|
rod_width = 0.2 * scale
|
|
l, r, t, b = 0, rod_length, rod_width / 2, -rod_width / 2
|
|
coords = [(l, b), (l, t), (r, t), (r, b)]
|
|
transformed_coords = []
|
|
for c in coords:
|
|
c = pygame.math.Vector2(c).rotate_rad(self.state[0] + np.pi / 2)
|
|
c = (c[0] + offset, c[1] + offset)
|
|
transformed_coords.append(c)
|
|
gfxdraw.aapolygon(self.surf, transformed_coords, (204, 77, 77))
|
|
gfxdraw.filled_polygon(self.surf, transformed_coords, (204, 77, 77))
|
|
|
|
gfxdraw.aacircle(self.surf, offset, offset, int(rod_width / 2), (204, 77, 77))
|
|
gfxdraw.filled_circle(
|
|
self.surf, offset, offset, int(rod_width / 2), (204, 77, 77)
|
|
)
|
|
|
|
rod_end = (rod_length, 0)
|
|
rod_end = pygame.math.Vector2(rod_end).rotate_rad(self.state[0] + np.pi / 2)
|
|
rod_end = (int(rod_end[0] + offset), int(rod_end[1] + offset))
|
|
gfxdraw.aacircle(
|
|
self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77)
|
|
)
|
|
gfxdraw.filled_circle(
|
|
self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77)
|
|
)
|
|
|
|
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
|
|
img = pygame.image.load(fname)
|
|
if self.last_u is not None:
|
|
scale_img = pygame.transform.smoothscale(
|
|
img, (scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2)
|
|
)
|
|
is_flip = bool(self.last_u > 0)
|
|
scale_img = pygame.transform.flip(scale_img, is_flip, True)
|
|
self.surf.blit(
|
|
scale_img,
|
|
(
|
|
offset - scale_img.get_rect().centerx,
|
|
offset - scale_img.get_rect().centery,
|
|
),
|
|
)
|
|
|
|
# drawing axle
|
|
gfxdraw.aacircle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0))
|
|
gfxdraw.filled_circle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0))
|
|
|
|
self.surf = pygame.transform.flip(self.surf, False, True)
|
|
self.screen.blit(self.surf, (0, 0))
|
|
if mode == "human":
|
|
pygame.event.pump()
|
|
self.clock.tick(self.metadata["render_fps"])
|
|
pygame.display.flip()
|
|
|
|
if mode == "rgb_array":
|
|
return np.transpose(
|
|
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
|
|
)
|
|
else:
|
|
return self.isopen
|
|
|
|
def close(self):
|
|
if self.screen is not None:
|
|
pygame.display.quit()
|
|
pygame.quit()
|
|
self.isopen = False
|
|
|
|
|
|
def angle_normalize(x):
|
|
return ((x + np.pi) % (2 * np.pi)) - np.pi
|