2016-04-27 08:00:58 -07:00
|
|
|
"""
|
|
|
|
Classic cart-pole system implemented by Rich Sutton et al.
|
2017-06-14 16:27:42 -04:00
|
|
|
Copied from http://incompleteideas.net/sutton/book/code/pole.c
|
|
|
|
permalink: https://perma.cc/C9ZM-652R
|
2016-04-27 08:00:58 -07:00
|
|
|
"""
|
|
|
|
|
|
|
|
import math
|
2021-12-08 22:14:15 +01:00
|
|
|
from typing import Optional
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
import gym
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
from gym import spaces, logger
|
2016-05-29 09:07:09 -07:00
|
|
|
from gym.utils import seeding
|
2016-04-27 08:00:58 -07:00
|
|
|
import numpy as np
|
|
|
|
|
2020-05-08 22:03:48 +02:00
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
class CartPoleEnv(gym.Env):
|
2018-08-24 19:30:17 -04:00
|
|
|
"""
|
|
|
|
Description:
|
2020-05-08 22:03:48 +02:00
|
|
|
A pole is attached by an un-actuated joint to a cart, which moves along
|
|
|
|
a frictionless track. The pendulum starts upright, and the goal is to
|
|
|
|
prevent it from falling over by increasing and reducing the cart's
|
|
|
|
velocity.
|
2018-08-24 19:30:17 -04:00
|
|
|
|
|
|
|
Source:
|
2020-05-08 22:03:48 +02:00
|
|
|
This environment corresponds to the version of the cart-pole problem
|
|
|
|
described by Barto, Sutton, and Anderson
|
2018-08-24 19:30:17 -04:00
|
|
|
|
2020-05-08 22:03:48 +02:00
|
|
|
Observation:
|
2018-08-24 19:30:17 -04:00
|
|
|
Type: Box(4)
|
2020-06-20 00:18:19 +02:00
|
|
|
Num Observation Min Max
|
2021-12-19 23:06:24 -08:00
|
|
|
0 Cart Position -2.4 2.4
|
2020-06-20 00:18:19 +02:00
|
|
|
1 Cart Velocity -Inf Inf
|
2021-12-19 23:06:24 -08:00
|
|
|
2 Pole Angle -0.209 rad (-12 deg) 0.209 rad (12 deg)
|
2020-06-20 00:18:19 +02:00
|
|
|
3 Pole Angular Velocity -Inf Inf
|
2020-05-08 22:03:48 +02:00
|
|
|
|
2018-08-24 19:30:17 -04:00
|
|
|
Actions:
|
|
|
|
Type: Discrete(2)
|
2020-05-29 22:02:22 +01:00
|
|
|
Num Action
|
|
|
|
0 Push cart to the left
|
|
|
|
1 Push cart to the right
|
2020-05-08 22:03:48 +02:00
|
|
|
|
|
|
|
Note: The amount the velocity that is reduced or increased is not
|
|
|
|
fixed; it depends on the angle the pole is pointing. This is because
|
|
|
|
the center of gravity of the pole increases the amount of energy needed
|
|
|
|
to move the cart underneath it
|
2018-08-24 19:30:17 -04:00
|
|
|
|
|
|
|
Reward:
|
|
|
|
Reward is 1 for every step taken, including the termination step
|
|
|
|
|
|
|
|
Starting State:
|
2019-02-08 11:46:51 -08:00
|
|
|
All observations are assigned a uniform random value in [-0.05..0.05]
|
2018-08-24 19:30:17 -04:00
|
|
|
|
|
|
|
Episode Termination:
|
2020-05-08 22:03:48 +02:00
|
|
|
Pole Angle is more than 12 degrees.
|
|
|
|
Cart Position is more than 2.4 (center of the cart reaches the edge of
|
|
|
|
the display).
|
|
|
|
Episode length is greater than 200.
|
|
|
|
Solved Requirements:
|
2020-06-12 17:14:35 -04:00
|
|
|
Considered solved when the average return is greater than or equal to
|
2020-05-08 22:03:48 +02:00
|
|
|
195.0 over 100 consecutive trials.
|
2018-08-24 19:30:17 -04:00
|
|
|
"""
|
2020-05-08 22:03:48 +02:00
|
|
|
|
2021-07-29 02:26:34 +02:00
|
|
|
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self.gravity = 9.8
|
|
|
|
self.masscart = 1.0
|
|
|
|
self.masspole = 0.1
|
2021-07-29 02:26:34 +02:00
|
|
|
self.total_mass = self.masspole + self.masscart
|
2020-05-08 22:03:48 +02:00
|
|
|
self.length = 0.5 # actually half the pole's length
|
2021-07-29 02:26:34 +02:00
|
|
|
self.polemass_length = self.masspole * self.length
|
2016-04-27 08:00:58 -07:00
|
|
|
self.force_mag = 10.0
|
|
|
|
self.tau = 0.02 # seconds between state updates
|
2021-07-29 02:26:34 +02:00
|
|
|
self.kinematics_integrator = "euler"
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
# Angle at which to fail the episode
|
|
|
|
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
|
|
|
self.x_threshold = 2.4
|
2016-05-29 09:07:09 -07:00
|
|
|
|
2020-05-08 22:03:48 +02:00
|
|
|
# Angle limit set to 2 * theta_threshold_radians so failing observation
|
|
|
|
# is still within bounds.
|
2021-07-29 02:26:34 +02:00
|
|
|
high = np.array(
|
|
|
|
[
|
|
|
|
self.x_threshold * 2,
|
|
|
|
np.finfo(np.float32).max,
|
|
|
|
self.theta_threshold_radians * 2,
|
|
|
|
np.finfo(np.float32).max,
|
|
|
|
],
|
|
|
|
dtype=np.float32,
|
|
|
|
)
|
2016-06-20 16:42:06 -04:00
|
|
|
|
2016-05-30 18:07:59 -07:00
|
|
|
self.action_space = spaces.Discrete(2)
|
2018-09-17 13:28:02 -04:00
|
|
|
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
|
2016-05-30 18:07:59 -07:00
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
self.viewer = None
|
2017-02-27 10:00:48 -08:00
|
|
|
self.state = None
|
2016-04-27 08:00:58 -07:00
|
|
|
|
2016-05-29 09:07:09 -07:00
|
|
|
self.steps_beyond_done = None
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def step(self, action):
|
2021-11-14 01:53:32 +01:00
|
|
|
err_msg = f"{action!r} ({type(action)}) invalid"
|
2020-05-08 22:03:48 +02:00
|
|
|
assert self.action_space.contains(action), err_msg
|
|
|
|
|
|
|
|
x, x_dot, theta, theta_dot = self.state
|
|
|
|
force = self.force_mag if action == 1 else -self.force_mag
|
2016-04-27 08:00:58 -07:00
|
|
|
costheta = math.cos(theta)
|
|
|
|
sintheta = math.sin(theta)
|
2020-05-08 22:03:48 +02:00
|
|
|
|
|
|
|
# For the interested reader:
|
|
|
|
# https://coneural.org/florian/papers/05_cart_pole.pdf
|
2021-07-29 15:39:42 -04:00
|
|
|
temp = (
|
|
|
|
force + self.polemass_length * theta_dot ** 2 * sintheta
|
|
|
|
) / self.total_mass
|
2021-07-29 02:26:34 +02:00
|
|
|
thetaacc = (self.gravity * sintheta - costheta * temp) / (
|
|
|
|
self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)
|
|
|
|
)
|
2020-05-08 22:03:48 +02:00
|
|
|
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
|
|
|
|
|
2021-07-29 02:26:34 +02:00
|
|
|
if self.kinematics_integrator == "euler":
|
2020-05-08 22:03:48 +02:00
|
|
|
x = x + self.tau * x_dot
|
2018-09-21 17:19:40 -05:00
|
|
|
x_dot = x_dot + self.tau * xacc
|
|
|
|
theta = theta + self.tau * theta_dot
|
|
|
|
theta_dot = theta_dot + self.tau * thetaacc
|
2020-05-08 22:03:48 +02:00
|
|
|
else: # semi-implicit euler
|
2018-09-21 17:19:40 -05:00
|
|
|
x_dot = x_dot + self.tau * xacc
|
2020-05-08 22:03:48 +02:00
|
|
|
x = x + self.tau * x_dot
|
2018-09-21 17:19:40 -05:00
|
|
|
theta_dot = theta_dot + self.tau * thetaacc
|
|
|
|
theta = theta + self.tau * theta_dot
|
2020-05-08 22:03:48 +02:00
|
|
|
|
|
|
|
self.state = (x, x_dot, theta, theta_dot)
|
|
|
|
|
|
|
|
done = bool(
|
|
|
|
x < -self.x_threshold
|
|
|
|
or x > self.x_threshold
|
|
|
|
or theta < -self.theta_threshold_radians
|
|
|
|
or theta > self.theta_threshold_radians
|
|
|
|
)
|
2016-04-28 22:31:46 -07:00
|
|
|
|
|
|
|
if not done:
|
|
|
|
reward = 1.0
|
|
|
|
elif self.steps_beyond_done is None:
|
|
|
|
# Pole just fell!
|
|
|
|
self.steps_beyond_done = 0
|
|
|
|
reward = 1.0
|
|
|
|
else:
|
|
|
|
if self.steps_beyond_done == 0:
|
2020-05-08 22:03:48 +02:00
|
|
|
logger.warn(
|
|
|
|
"You are calling 'step()' even though this "
|
|
|
|
"environment has already returned done = True. You "
|
|
|
|
"should always call 'reset()' once you receive 'done = "
|
|
|
|
"True' -- any further steps are undefined behavior."
|
|
|
|
)
|
2016-04-28 22:31:46 -07:00
|
|
|
self.steps_beyond_done += 1
|
|
|
|
reward = 0.0
|
|
|
|
|
2021-08-22 00:11:19 +02:00
|
|
|
return np.array(self.state, dtype=np.float32), reward, done, {}
|
2016-04-27 08:00:58 -07:00
|
|
|
|
2021-12-08 22:14:15 +01:00
|
|
|
def reset(self, seed: Optional[int] = None):
|
|
|
|
super().reset(seed=seed)
|
2016-05-29 09:07:09 -07:00
|
|
|
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
|
2016-04-29 02:12:46 -07:00
|
|
|
self.steps_beyond_done = None
|
2021-08-22 00:11:19 +02:00
|
|
|
return np.array(self.state, dtype=np.float32)
|
2016-04-27 08:00:58 -07:00
|
|
|
|
2021-07-29 02:26:34 +02:00
|
|
|
def render(self, mode="human"):
|
2016-04-27 08:00:58 -07:00
|
|
|
screen_width = 600
|
|
|
|
screen_height = 400
|
|
|
|
|
2020-05-08 22:03:48 +02:00
|
|
|
world_width = self.x_threshold * 2
|
2021-07-29 02:26:34 +02:00
|
|
|
scale = screen_width / world_width
|
2020-05-08 22:03:48 +02:00
|
|
|
carty = 100 # TOP OF CART
|
2016-04-27 08:00:58 -07:00
|
|
|
polewidth = 10.0
|
2018-10-18 14:30:18 -07:00
|
|
|
polelen = scale * (2 * self.length)
|
2016-04-27 08:00:58 -07:00
|
|
|
cartwidth = 50.0
|
|
|
|
cartheight = 30.0
|
|
|
|
|
|
|
|
if self.viewer is None:
|
|
|
|
from gym.envs.classic_control import rendering
|
2021-07-29 02:26:34 +02:00
|
|
|
|
2017-01-03 23:19:14 -08:00
|
|
|
self.viewer = rendering.Viewer(screen_width, screen_height)
|
2020-05-08 22:03:48 +02:00
|
|
|
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
|
|
|
|
axleoffset = cartheight / 4.0
|
|
|
|
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
|
2016-04-27 08:00:58 -07:00
|
|
|
self.carttrans = rendering.Transform()
|
|
|
|
cart.add_attr(self.carttrans)
|
|
|
|
self.viewer.add_geom(cart)
|
2021-07-29 02:26:34 +02:00
|
|
|
l, r, t, b = (
|
|
|
|
-polewidth / 2,
|
|
|
|
polewidth / 2,
|
|
|
|
polelen - polewidth / 2,
|
|
|
|
-polewidth / 2,
|
|
|
|
)
|
2020-05-08 22:03:48 +02:00
|
|
|
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
|
2021-07-29 02:26:34 +02:00
|
|
|
pole.set_color(0.8, 0.6, 0.4)
|
2016-04-27 08:00:58 -07:00
|
|
|
self.poletrans = rendering.Transform(translation=(0, axleoffset))
|
|
|
|
pole.add_attr(self.poletrans)
|
|
|
|
pole.add_attr(self.carttrans)
|
|
|
|
self.viewer.add_geom(pole)
|
2021-07-29 02:26:34 +02:00
|
|
|
self.axle = rendering.make_circle(polewidth / 2)
|
2016-04-27 08:00:58 -07:00
|
|
|
self.axle.add_attr(self.poletrans)
|
|
|
|
self.axle.add_attr(self.carttrans)
|
2021-07-29 02:26:34 +02:00
|
|
|
self.axle.set_color(0.5, 0.5, 0.8)
|
2016-04-27 08:00:58 -07:00
|
|
|
self.viewer.add_geom(self.axle)
|
2020-05-08 22:03:48 +02:00
|
|
|
self.track = rendering.Line((0, carty), (screen_width, carty))
|
|
|
|
self.track.set_color(0, 0, 0)
|
2016-04-27 08:00:58 -07:00
|
|
|
self.viewer.add_geom(self.track)
|
|
|
|
|
2018-10-18 14:30:18 -07:00
|
|
|
self._pole_geom = pole
|
|
|
|
|
2020-05-08 22:03:48 +02:00
|
|
|
if self.state is None:
|
|
|
|
return None
|
2017-02-27 10:00:48 -08:00
|
|
|
|
2018-10-18 14:30:18 -07:00
|
|
|
# Edit the pole polygon vertex
|
|
|
|
pole = self._pole_geom
|
2021-07-29 02:26:34 +02:00
|
|
|
l, r, t, b = (
|
|
|
|
-polewidth / 2,
|
|
|
|
polewidth / 2,
|
|
|
|
polelen - polewidth / 2,
|
|
|
|
-polewidth / 2,
|
|
|
|
)
|
2020-05-08 22:03:48 +02:00
|
|
|
pole.v = [(l, b), (l, t), (r, t), (r, b)]
|
2018-10-18 14:30:18 -07:00
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
x = self.state
|
2020-05-08 22:03:48 +02:00
|
|
|
cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART
|
2016-04-27 08:00:58 -07:00
|
|
|
self.carttrans.set_translation(cartx, carty)
|
|
|
|
self.poletrans.set_rotation(-x[2])
|
|
|
|
|
2021-07-29 02:26:34 +02:00
|
|
|
return self.viewer.render(return_rgb_array=mode == "rgb_array")
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
|
|
|
|
def close(self):
|
2018-09-14 13:36:57 -07:00
|
|
|
if self.viewer:
|
|
|
|
self.viewer.close()
|
|
|
|
self.viewer = None
|