mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-17 20:39:12 +00:00
* initial draft of optional info dict in reset function, implemented for cartpole, tests seem to be passing * merged core.py * updated return type annotation for reset function in core.py * optional metadata with return_info from reset added for all first party environments, with corresponding tests. Incomplete implementation for wrappers and vector wrappers * removed Optional type for return_info arguments * added tests for return_info to normalize wrapper and sync_vector_env * autoformatted using black * added optional reset metadata tests to several wrappers * added return_info capability to async_vector_env.py and test to verify functionality * added optional return_info test for record_video.py * removed tests for mujoco environments * autoformatted * improved test coverage for optional reset return_info * re-removed unit test envs accidentally reintroduced in merge * removed unnecessary import * changes based on code-review * small fix to core wrapper typing and autoformatted record_epsisode_stats * small change to pass flake8 style
166 lines
5.3 KiB
Python
166 lines
5.3 KiB
Python
__credits__ = ["Carlos Luis"]
|
|
|
|
from typing import Optional
|
|
|
|
import gym
|
|
from gym import spaces
|
|
from gym.utils import seeding
|
|
import numpy as np
|
|
from os import path
|
|
|
|
|
|
class PendulumEnv(gym.Env):
|
|
"""
|
|
## Description
|
|
|
|
The inverted pendulum swingup problem is a classic problem in the control literature. In this
|
|
version of the problem, the pendulum starts in a random position, and the goal is to swing it up so
|
|
it stays upright.
|
|
|
|
The diagram below specifies the coordinate system used for the implementation of the pendulum's
|
|
dynamic equations.
|
|
|
|

|
|
|
|
- `x-y`: cartesian coordinates of the pendulum's end in meters.
|
|
- `theta`: angle in radians.
|
|
- `tau`: torque in `N * m`. Defined as positive _counter-clockwise_.
|
|
|
|
## Action Space
|
|
The action is the torque applied to the pendulum.
|
|
|
|
| Num | Action | Min | Max |
|
|
|-----|--------|------|-----|
|
|
| 0 | Torque | -2.0 | 2.0 |
|
|
|
|
|
|
## Observation Space
|
|
The observations correspond to the x-y coordinate of the pendulum's end, and its angular velocity.
|
|
|
|
| Num | Observation | Min | Max |
|
|
|-----|------------------|------|-----|
|
|
| 0 | x = cos(theta) | -1.0 | 1.0 |
|
|
| 1 | y = sin(angle) | -1.0 | 1.0 |
|
|
| 2 | Angular Velocity | -8.0 | 8.0 |
|
|
|
|
## Rewards
|
|
The reward is defined as:
|
|
```
|
|
r = -(theta^2 + 0.1*theta_dt^2 + 0.001*torque^2)
|
|
```
|
|
where `theta` is the pendulum's angle normalized between `[-pi, pi]`.
|
|
Based on the above equation, the minimum reward that can be obtained is `-(pi^2 + 0.1*8^2 +
|
|
0.001*2^2) = -16.2736044`, while the maximum reward is zero (pendulum is
|
|
upright with zero velocity and no torque being applied).
|
|
|
|
## Starting State
|
|
The starting state is a random angle in `[-pi, pi]` and a random angular velocity in `[-1,1]`.
|
|
|
|
## Episode Termination
|
|
An episode terminates after 200 steps. There's no other criteria for termination.
|
|
|
|
## Arguments
|
|
- `g`: acceleration of gravity measured in `(m/s^2)` used to calculate the pendulum dynamics. The default is
|
|
`g=10.0`.
|
|
|
|
```
|
|
gym.make('CartPole-v1', g=9.81)
|
|
```
|
|
|
|
## Version History
|
|
|
|
* v1: Simplify the math equations, no difference in behavior.
|
|
* v0: Initial versions release (1.0.0)
|
|
"""
|
|
|
|
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
|
|
|
|
def __init__(self, g=10.0):
|
|
self.max_speed = 8
|
|
self.max_torque = 2.0
|
|
self.dt = 0.05
|
|
self.g = g
|
|
self.m = 1.0
|
|
self.l = 1.0
|
|
self.viewer = None
|
|
|
|
high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
|
|
self.action_space = spaces.Box(
|
|
low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
|
|
)
|
|
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
|
|
|
|
def step(self, u):
|
|
th, thdot = self.state # th := theta
|
|
|
|
g = self.g
|
|
m = self.m
|
|
l = self.l
|
|
dt = self.dt
|
|
|
|
u = np.clip(u, -self.max_torque, self.max_torque)[0]
|
|
self.last_u = u # for rendering
|
|
costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)
|
|
|
|
newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt
|
|
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
|
|
newth = th + newthdot * dt
|
|
|
|
self.state = np.array([newth, newthdot])
|
|
return self._get_obs(), -costs, False, {}
|
|
|
|
def reset(
|
|
self,
|
|
*,
|
|
seed: Optional[int] = None,
|
|
return_info: bool = False,
|
|
options: Optional[dict] = None
|
|
):
|
|
super().reset(seed=seed)
|
|
high = np.array([np.pi, 1])
|
|
self.state = self.np_random.uniform(low=-high, high=high)
|
|
self.last_u = None
|
|
if not return_info:
|
|
return self._get_obs()
|
|
else:
|
|
return self._get_obs(), {}
|
|
|
|
def _get_obs(self):
|
|
theta, thetadot = self.state
|
|
return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
|
|
|
|
def render(self, mode="human"):
|
|
if self.viewer is None:
|
|
from gym.utils import pyglet_rendering
|
|
|
|
self.viewer = pyglet_rendering.Viewer(500, 500)
|
|
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
|
|
rod = pyglet_rendering.make_capsule(1, 0.2)
|
|
rod.set_color(0.8, 0.3, 0.3)
|
|
self.pole_transform = pyglet_rendering.Transform()
|
|
rod.add_attr(self.pole_transform)
|
|
self.viewer.add_geom(rod)
|
|
axle = pyglet_rendering.make_circle(0.05)
|
|
axle.set_color(0, 0, 0)
|
|
self.viewer.add_geom(axle)
|
|
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
|
|
self.img = pyglet_rendering.Image(fname, 1.0, 1.0)
|
|
self.imgtrans = pyglet_rendering.Transform()
|
|
self.img.add_attr(self.imgtrans)
|
|
|
|
self.viewer.add_onetime(self.img)
|
|
self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
|
|
if self.last_u is not None:
|
|
self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
|
|
|
|
return self.viewer.render(return_rgb_array=mode == "rgb_array")
|
|
|
|
def close(self):
|
|
if self.viewer:
|
|
self.viewer.close()
|
|
self.viewer = None
|
|
|
|
|
|
def angle_normalize(x):
|
|
return ((x + np.pi) % (2 * np.pi)) - np.pi
|