2020-04-24 23:10:27 +02:00
|
|
|
"""
|
|
|
|
Rocket trajectory optimization is a classic topic in Optimal Control.
|
|
|
|
|
|
|
|
According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
|
|
|
|
turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
|
|
|
|
|
|
|
|
The landing pad is always at coordinates (0,0). The coordinates are the first two numbers in the state vector.
|
|
|
|
Reward for moving from the top of the screen to the landing pad and zero speed is about 100..140 points.
|
|
|
|
If the lander moves away from the landing pad it loses reward. The episode finishes if the lander crashes or
|
|
|
|
comes to rest, receiving an additional -100 or +100 points. Each leg with ground contact is +10 points.
|
|
|
|
Firing the main engine is -0.3 points each frame. Firing the side engine is -0.03 points each frame.
|
|
|
|
Solved is 200 points.
|
|
|
|
|
|
|
|
Landing outside the landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
|
|
|
on its first attempt. Please see the source code for details.
|
|
|
|
|
|
|
|
To see a heuristic landing, run:
|
|
|
|
|
|
|
|
python gym/envs/box2d/lunar_lander.py
|
|
|
|
|
|
|
|
To play yourself, run:
|
|
|
|
|
|
|
|
python examples/agents/keyboard_agent.py LunarLander-v2
|
|
|
|
|
|
|
|
Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
import sys, math
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
import Box2D
|
|
|
|
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
|
|
|
|
|
|
|
|
import gym
|
|
|
|
from gym import spaces
|
2018-09-24 13:16:46 -07:00
|
|
|
from gym.utils import seeding, EzPickle
|
2016-05-03 22:27:42 +03:00
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
FPS = 50
|
|
|
|
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
|
|
|
|
|
|
|
|
MAIN_ENGINE_POWER = 13.0
|
|
|
|
SIDE_ENGINE_POWER = 0.6
|
2016-05-03 22:27:42 +03:00
|
|
|
|
2016-05-16 17:12:44 +03:00
|
|
|
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
LANDER_POLY =[
|
2020-04-24 23:10:27 +02:00
|
|
|
(-14, +17), (-17, 0), (-17 ,-10),
|
|
|
|
(+17, -10), (+17, 0), (+14, +17)
|
2016-05-03 22:27:42 +03:00
|
|
|
]
|
|
|
|
LEG_AWAY = 20
|
|
|
|
LEG_DOWN = 18
|
|
|
|
LEG_W, LEG_H = 2, 8
|
|
|
|
LEG_SPRING_TORQUE = 40
|
|
|
|
|
|
|
|
SIDE_ENGINE_HEIGHT = 14.0
|
2020-04-24 23:10:27 +02:00
|
|
|
SIDE_ENGINE_AWAY = 12.0
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
VIEWPORT_W = 600
|
|
|
|
VIEWPORT_H = 400
|
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
class ContactDetector(contactListener):
|
|
|
|
def __init__(self, env):
|
2016-05-25 11:16:15 +03:00
|
|
|
contactListener.__init__(self)
|
|
|
|
self.env = env
|
2020-04-24 23:10:27 +02:00
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
def BeginContact(self, contact):
|
2020-04-24 23:10:27 +02:00
|
|
|
if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
|
2016-05-03 22:27:42 +03:00
|
|
|
self.env.game_over = True
|
2016-05-16 17:12:44 +03:00
|
|
|
for i in range(2):
|
|
|
|
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
|
|
|
|
self.env.legs[i].ground_contact = True
|
2020-04-24 23:10:27 +02:00
|
|
|
|
2016-05-16 17:12:44 +03:00
|
|
|
def EndContact(self, contact):
|
|
|
|
for i in range(2):
|
|
|
|
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
|
|
|
|
self.env.legs[i].ground_contact = False
|
2016-05-03 22:27:42 +03:00
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
|
2018-09-24 13:16:46 -07:00
|
|
|
class LunarLander(gym.Env, EzPickle):
|
2016-05-03 22:27:42 +03:00
|
|
|
metadata = {
|
|
|
|
'render.modes': ['human', 'rgb_array'],
|
|
|
|
'video.frames_per_second' : FPS
|
|
|
|
}
|
|
|
|
|
2016-08-25 02:08:32 +03:00
|
|
|
continuous = False
|
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
def __init__(self):
|
2018-09-24 13:16:46 -07:00
|
|
|
EzPickle.__init__(self)
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
self.seed()
|
2016-05-03 22:27:42 +03:00
|
|
|
self.viewer = None
|
|
|
|
|
2016-05-25 11:16:15 +03:00
|
|
|
self.world = Box2D.b2World()
|
2016-05-03 22:27:42 +03:00
|
|
|
self.moon = None
|
|
|
|
self.lander = None
|
|
|
|
self.particles = []
|
|
|
|
|
|
|
|
self.prev_reward = None
|
|
|
|
|
2018-08-14 17:30:40 -07:00
|
|
|
# useful range is -1 .. +1, but spikes can be higher
|
|
|
|
self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
|
2016-08-25 02:08:32 +03:00
|
|
|
|
|
|
|
if self.continuous:
|
|
|
|
# Action is two floats [main engine, left-right engines].
|
|
|
|
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
|
|
|
|
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
|
2018-08-14 17:30:40 -07:00
|
|
|
self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
|
2016-08-25 02:08:32 +03:00
|
|
|
else:
|
|
|
|
# Nop, fire left engine, main engine, right engine
|
|
|
|
self.action_space = spaces.Discrete(4)
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
self.reset()
|
2016-05-30 18:07:59 -07:00
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def seed(self, seed=None):
|
2016-05-30 18:07:59 -07:00
|
|
|
self.np_random, seed = seeding.np_random(seed)
|
2016-05-29 09:07:09 -07:00
|
|
|
return [seed]
|
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
def _destroy(self):
|
|
|
|
if not self.moon: return
|
2016-05-25 11:16:15 +03:00
|
|
|
self.world.contactListener = None
|
2016-05-03 22:27:42 +03:00
|
|
|
self._clean_particles(True)
|
|
|
|
self.world.DestroyBody(self.moon)
|
|
|
|
self.moon = None
|
|
|
|
self.world.DestroyBody(self.lander)
|
|
|
|
self.lander = None
|
|
|
|
self.world.DestroyBody(self.legs[0])
|
|
|
|
self.world.DestroyBody(self.legs[1])
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def reset(self):
|
2016-05-03 22:27:42 +03:00
|
|
|
self._destroy()
|
2016-06-14 18:32:51 +03:00
|
|
|
self.world.contactListener_keepref = ContactDetector(self)
|
|
|
|
self.world.contactListener = self.world.contactListener_keepref
|
2016-05-03 22:27:42 +03:00
|
|
|
self.game_over = False
|
|
|
|
self.prev_shaping = None
|
|
|
|
|
|
|
|
W = VIEWPORT_W/SCALE
|
|
|
|
H = VIEWPORT_H/SCALE
|
|
|
|
|
|
|
|
# terrain
|
|
|
|
CHUNKS = 11
|
2020-04-24 23:10:27 +02:00
|
|
|
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,))
|
|
|
|
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
|
2016-05-03 22:27:42 +03:00
|
|
|
self.helipad_x1 = chunk_x[CHUNKS//2-1]
|
|
|
|
self.helipad_x2 = chunk_x[CHUNKS//2+1]
|
2020-04-24 23:10:27 +02:00
|
|
|
self.helipad_y = H/4
|
2016-05-03 22:27:42 +03:00
|
|
|
height[CHUNKS//2-2] = self.helipad_y
|
|
|
|
height[CHUNKS//2-1] = self.helipad_y
|
|
|
|
height[CHUNKS//2+0] = self.helipad_y
|
|
|
|
height[CHUNKS//2+1] = self.helipad_y
|
|
|
|
height[CHUNKS//2+2] = self.helipad_y
|
2016-05-16 17:12:44 +03:00
|
|
|
smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
|
2016-05-03 22:27:42 +03:00
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
self.moon = self.world.CreateStaticBody(shapes=edgeShape(vertices=[(0, 0), (W, 0)]))
|
2016-05-03 22:27:42 +03:00
|
|
|
self.sky_polys = []
|
2016-05-16 17:12:44 +03:00
|
|
|
for i in range(CHUNKS-1):
|
2020-04-24 23:10:27 +02:00
|
|
|
p1 = (chunk_x[i], smooth_y[i])
|
2016-05-03 22:27:42 +03:00
|
|
|
p2 = (chunk_x[i+1], smooth_y[i+1])
|
|
|
|
self.moon.CreateEdgeFixture(
|
|
|
|
vertices=[p1,p2],
|
|
|
|
density=0,
|
|
|
|
friction=0.1)
|
2020-04-24 23:10:27 +02:00
|
|
|
self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
|
2016-05-03 22:27:42 +03:00
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
self.moon.color1 = (0.0, 0.0, 0.0)
|
|
|
|
self.moon.color2 = (0.0, 0.0, 0.0)
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
initial_y = VIEWPORT_H/SCALE
|
|
|
|
self.lander = self.world.CreateDynamicBody(
|
2020-04-24 23:10:27 +02:00
|
|
|
position=(VIEWPORT_W/SCALE/2, initial_y),
|
2016-05-03 22:27:42 +03:00
|
|
|
angle=0.0,
|
|
|
|
fixtures = fixtureDef(
|
2020-04-24 23:10:27 +02:00
|
|
|
shape=polygonShape(vertices=[(x/SCALE, y/SCALE) for x, y in LANDER_POLY]),
|
2016-05-03 22:27:42 +03:00
|
|
|
density=5.0,
|
|
|
|
friction=0.1,
|
|
|
|
categoryBits=0x0010,
|
2020-04-24 23:10:27 +02:00
|
|
|
maskBits=0x001, # collide only with ground
|
|
|
|
restitution=0.0) # 0.99 bouncy
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
2020-04-24 23:10:27 +02:00
|
|
|
self.lander.color1 = (0.5, 0.4, 0.9)
|
|
|
|
self.lander.color2 = (0.3, 0.3, 0.5)
|
2016-05-03 22:27:42 +03:00
|
|
|
self.lander.ApplyForceToCenter( (
|
2016-05-29 09:07:09 -07:00
|
|
|
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
|
|
|
|
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
|
2016-05-03 22:27:42 +03:00
|
|
|
), True)
|
|
|
|
|
|
|
|
self.legs = []
|
2020-04-24 23:10:27 +02:00
|
|
|
for i in [-1, +1]:
|
2016-05-03 22:27:42 +03:00
|
|
|
leg = self.world.CreateDynamicBody(
|
2020-04-24 23:10:27 +02:00
|
|
|
position=(VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
|
|
|
|
angle=(i * 0.05),
|
|
|
|
fixtures=fixtureDef(
|
2016-05-03 22:27:42 +03:00
|
|
|
shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
|
|
|
|
density=1.0,
|
|
|
|
restitution=0.0,
|
|
|
|
categoryBits=0x0020,
|
|
|
|
maskBits=0x001)
|
|
|
|
)
|
2016-05-16 17:12:44 +03:00
|
|
|
leg.ground_contact = False
|
2020-04-24 23:10:27 +02:00
|
|
|
leg.color1 = (0.5, 0.4, 0.9)
|
|
|
|
leg.color2 = (0.3, 0.3, 0.5)
|
2016-05-03 22:27:42 +03:00
|
|
|
rjd = revoluteJointDef(
|
|
|
|
bodyA=self.lander,
|
|
|
|
bodyB=leg,
|
|
|
|
localAnchorA=(0, 0),
|
2020-04-24 23:10:27 +02:00
|
|
|
localAnchorB=(i * LEG_AWAY/SCALE, LEG_DOWN/SCALE),
|
2016-05-03 22:27:42 +03:00
|
|
|
enableMotor=True,
|
|
|
|
enableLimit=True,
|
|
|
|
maxMotorTorque=LEG_SPRING_TORQUE,
|
2020-04-24 23:10:27 +02:00
|
|
|
motorSpeed=+0.3 * i # low enough not to jump back into the sky
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
2020-04-24 23:10:27 +02:00
|
|
|
if i == -1:
|
|
|
|
rjd.lowerAngle = +0.9 - 0.5 # The most esoteric numbers here, angled legs have freedom to travel within
|
2016-05-03 22:27:42 +03:00
|
|
|
rjd.upperAngle = +0.9
|
|
|
|
else:
|
|
|
|
rjd.lowerAngle = -0.9
|
|
|
|
rjd.upperAngle = -0.9 + 0.5
|
|
|
|
leg.joint = self.world.CreateJoint(rjd)
|
|
|
|
self.legs.append(leg)
|
|
|
|
|
|
|
|
self.drawlist = [self.lander] + self.legs
|
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
return self.step(np.array([0, 0]) if self.continuous else 0)[0]
|
2016-05-03 22:27:42 +03:00
|
|
|
|
2016-08-25 02:08:32 +03:00
|
|
|
def _create_particle(self, mass, x, y, ttl):
|
2016-05-03 22:27:42 +03:00
|
|
|
p = self.world.CreateDynamicBody(
|
2020-04-24 23:10:27 +02:00
|
|
|
position = (x, y),
|
2016-05-03 22:27:42 +03:00
|
|
|
angle=0.0,
|
|
|
|
fixtures = fixtureDef(
|
2020-04-24 23:10:27 +02:00
|
|
|
shape=circleShape(radius=2/SCALE, pos=(0, 0)),
|
2016-05-03 22:27:42 +03:00
|
|
|
density=mass,
|
|
|
|
friction=0.1,
|
|
|
|
categoryBits=0x0100,
|
|
|
|
maskBits=0x001, # collide only with ground
|
2016-05-16 17:12:44 +03:00
|
|
|
restitution=0.3)
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
2016-08-25 02:08:32 +03:00
|
|
|
p.ttl = ttl
|
2016-05-03 22:27:42 +03:00
|
|
|
self.particles.append(p)
|
|
|
|
self._clean_particles(False)
|
|
|
|
return p
|
|
|
|
|
|
|
|
def _clean_particles(self, all):
|
2020-04-24 23:10:27 +02:00
|
|
|
while self.particles and (all or self.particles[0].ttl < 0):
|
2016-05-03 22:27:42 +03:00
|
|
|
self.world.DestroyBody(self.particles.pop(0))
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def step(self, action):
|
2018-08-27 11:22:29 -07:00
|
|
|
if self.continuous:
|
|
|
|
action = np.clip(action, -1, +1).astype(np.float32)
|
|
|
|
else:
|
|
|
|
assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
# Engines
|
|
|
|
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
|
2020-04-24 23:10:27 +02:00
|
|
|
side = (-tip[1], tip[0])
|
2016-05-29 09:07:09 -07:00
|
|
|
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
2016-08-25 02:08:32 +03:00
|
|
|
|
|
|
|
m_power = 0.0
|
2020-04-24 23:10:27 +02:00
|
|
|
if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
|
2016-08-25 02:08:32 +03:00
|
|
|
# Main engine
|
|
|
|
if self.continuous:
|
|
|
|
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
|
2020-04-24 23:10:27 +02:00
|
|
|
assert m_power >= 0.5 and m_power <= 1.0
|
2016-08-25 02:08:32 +03:00
|
|
|
else:
|
|
|
|
m_power = 1.0
|
2020-04-24 23:10:27 +02:00
|
|
|
ox = (tip[0] * (4/SCALE + 2 * dispersion[0]) +
|
|
|
|
side[0] * dispersion[1]) # 4 is move a bit downwards, +-2 for randomness
|
|
|
|
oy = -tip[1] * (4/SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
|
2016-05-03 22:27:42 +03:00
|
|
|
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
|
2020-04-24 23:10:27 +02:00
|
|
|
p = self._create_particle(3.5, # 3.5 is here to make particle speed adequate
|
|
|
|
impulse_pos[0],
|
|
|
|
impulse_pos[1],
|
|
|
|
m_power) # particles are just a decoration
|
|
|
|
p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
|
|
|
|
impulse_pos,
|
|
|
|
True)
|
|
|
|
self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
|
|
|
|
impulse_pos,
|
|
|
|
True)
|
2016-08-25 02:08:32 +03:00
|
|
|
|
|
|
|
s_power = 0.0
|
2020-04-24 23:10:27 +02:00
|
|
|
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
|
2016-08-25 02:08:32 +03:00
|
|
|
# Orientation engines
|
|
|
|
if self.continuous:
|
|
|
|
direction = np.sign(action[1])
|
2020-04-24 23:10:27 +02:00
|
|
|
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
|
|
|
|
assert s_power >= 0.5 and s_power <= 1.0
|
2016-08-25 02:08:32 +03:00
|
|
|
else:
|
|
|
|
direction = action-2
|
|
|
|
s_power = 1.0
|
2020-04-24 23:10:27 +02:00
|
|
|
ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
|
|
|
|
oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
|
|
|
|
impulse_pos = (self.lander.position[0] + ox - tip[0] * 17/SCALE,
|
|
|
|
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT/SCALE)
|
2016-08-25 02:08:32 +03:00
|
|
|
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
|
2020-04-24 23:10:27 +02:00
|
|
|
p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
|
|
|
|
impulse_pos
|
|
|
|
, True)
|
|
|
|
self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
|
|
|
|
impulse_pos,
|
|
|
|
True)
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
self.world.Step(1.0/FPS, 6*30, 2*30)
|
|
|
|
|
|
|
|
pos = self.lander.position
|
|
|
|
vel = self.lander.linearVelocity
|
|
|
|
state = [
|
|
|
|
(pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
|
2018-08-27 17:24:38 -04:00
|
|
|
(pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
|
2016-05-03 22:27:42 +03:00
|
|
|
vel.x*(VIEWPORT_W/SCALE/2)/FPS,
|
|
|
|
vel.y*(VIEWPORT_H/SCALE/2)/FPS,
|
2016-05-16 17:12:44 +03:00
|
|
|
self.lander.angle,
|
|
|
|
20.0*self.lander.angularVelocity/FPS,
|
|
|
|
1.0 if self.legs[0].ground_contact else 0.0,
|
|
|
|
1.0 if self.legs[1].ground_contact else 0.0
|
2016-05-03 22:27:42 +03:00
|
|
|
]
|
2020-04-24 23:10:27 +02:00
|
|
|
assert len(state) == 8
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
reward = 0
|
2016-05-16 17:12:44 +03:00
|
|
|
shaping = \
|
|
|
|
- 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
|
|
|
|
- 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
|
2020-04-24 23:10:27 +02:00
|
|
|
- 100*abs(state[4]) + 10*state[6] + 10*state[7] # And ten points for legs contact, the idea is if you
|
|
|
|
# lose contact again after landing, you get negative reward
|
2016-05-03 22:27:42 +03:00
|
|
|
if self.prev_shaping is not None:
|
|
|
|
reward = shaping - self.prev_shaping
|
|
|
|
self.prev_shaping = shaping
|
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
reward -= m_power*0.30 # less fuel spent is better, about -30 for heuristic landing
|
2016-08-25 02:08:32 +03:00
|
|
|
reward -= s_power*0.03
|
2016-05-25 23:19:15 +03:00
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
done = False
|
|
|
|
if self.game_over or abs(state[0]) >= 1.0:
|
2020-04-24 23:10:27 +02:00
|
|
|
done = True
|
2016-05-16 17:12:44 +03:00
|
|
|
reward = -100
|
2016-05-03 22:27:42 +03:00
|
|
|
if not self.lander.awake:
|
2020-04-24 23:10:27 +02:00
|
|
|
done = True
|
2016-05-16 17:12:44 +03:00
|
|
|
reward = +100
|
2018-08-14 17:30:40 -07:00
|
|
|
return np.array(state, dtype=np.float32), reward, done, {}
|
2016-05-03 22:27:42 +03:00
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def render(self, mode='human'):
|
2016-05-03 22:27:42 +03:00
|
|
|
from gym.envs.classic_control import rendering
|
|
|
|
if self.viewer is None:
|
|
|
|
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
|
|
|
|
self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
|
|
|
|
|
|
|
|
for obj in self.particles:
|
2016-05-16 17:12:44 +03:00
|
|
|
obj.ttl -= 0.15
|
2020-04-24 23:10:27 +02:00
|
|
|
obj.color1 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
|
|
|
|
obj.color2 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
self._clean_particles(False)
|
|
|
|
|
|
|
|
for p in self.sky_polys:
|
2020-04-24 23:10:27 +02:00
|
|
|
self.viewer.draw_polygon(p, color=(0, 0, 0))
|
2016-05-03 22:27:42 +03:00
|
|
|
|
|
|
|
for obj in self.particles + self.drawlist:
|
|
|
|
for f in obj.fixtures:
|
|
|
|
trans = f.body.transform
|
|
|
|
if type(f.shape) is circleShape:
|
|
|
|
t = rendering.Transform(translation=trans*f.shape.pos)
|
2016-05-16 17:12:44 +03:00
|
|
|
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
|
|
|
|
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
|
2016-05-03 22:27:42 +03:00
|
|
|
else:
|
|
|
|
path = [trans*v for v in f.shape.vertices]
|
|
|
|
self.viewer.draw_polygon(path, color=obj.color1)
|
|
|
|
path.append(path[0])
|
|
|
|
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
|
|
|
|
|
|
|
|
for x in [self.helipad_x1, self.helipad_x2]:
|
|
|
|
flagy1 = self.helipad_y
|
|
|
|
flagy2 = flagy1 + 50/SCALE
|
2020-04-24 23:10:27 +02:00
|
|
|
self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
|
|
|
|
self.viewer.draw_polygon([(x, flagy2), (x, flagy2-10/SCALE), (x + 25/SCALE, flagy2 - 5/SCALE)],
|
|
|
|
color=(0.8, 0.8, 0))
|
2016-05-03 22:27:42 +03:00
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
|
2016-05-16 17:12:44 +03:00
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def close(self):
|
|
|
|
if self.viewer is not None:
|
|
|
|
self.viewer.close()
|
|
|
|
self.viewer = None
|
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
|
2016-08-25 02:08:32 +03:00
|
|
|
class LunarLanderContinuous(LunarLander):
|
|
|
|
continuous = True
|
|
|
|
|
|
|
|
def heuristic(env, s):
|
2020-04-24 23:10:27 +02:00
|
|
|
"""
|
|
|
|
The heuristic for
|
|
|
|
1. Testing
|
|
|
|
2. Demonstration rollout.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
env: The environment
|
|
|
|
s (list): The state. Attributes:
|
|
|
|
s[0] is the horizontal coordinate
|
|
|
|
s[1] is the vertical coordinate
|
|
|
|
s[2] is the horizontal speed
|
|
|
|
s[3] is the vertical speed
|
|
|
|
s[4] is the angle
|
|
|
|
s[5] is the angular speed
|
|
|
|
s[6] 1 if first leg has contact, else 0
|
|
|
|
s[7] 1 if second leg has contact, else 0
|
|
|
|
returns:
|
|
|
|
a: The heuristic to be fed into the step function defined above to determine the next step and reward.
|
|
|
|
"""
|
|
|
|
|
|
|
|
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center
|
|
|
|
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
|
2016-08-25 02:08:32 +03:00
|
|
|
if angle_targ < -0.4: angle_targ = -0.4
|
2020-04-24 23:10:27 +02:00
|
|
|
hover_targ = 0.55*np.abs(s[0]) # target y should be proportional to horizontal offset
|
2016-08-25 02:08:32 +03:00
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
angle_todo = (angle_targ - s[4]) * 0.5 - (s[5])*1.0
|
2016-08-25 02:08:32 +03:00
|
|
|
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
|
|
|
|
|
2020-04-24 23:10:27 +02:00
|
|
|
if s[6] or s[7]: # legs have contact
|
2016-08-25 02:08:32 +03:00
|
|
|
angle_todo = 0
|
|
|
|
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
|
|
|
|
|
|
|
|
if env.continuous:
|
2020-04-24 23:10:27 +02:00
|
|
|
a = np.array([hover_todo*20 - 1, -angle_todo*20])
|
2016-08-25 02:08:32 +03:00
|
|
|
a = np.clip(a, -1, +1)
|
|
|
|
else:
|
|
|
|
a = 0
|
|
|
|
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
|
|
|
|
elif angle_todo < -0.05: a = 3
|
|
|
|
elif angle_todo > +0.05: a = 1
|
|
|
|
return a
|
2018-08-28 14:12:10 -07:00
|
|
|
|
|
|
|
def demo_heuristic_lander(env, seed=None, render=False):
|
|
|
|
env.seed(seed)
|
|
|
|
total_reward = 0
|
|
|
|
steps = 0
|
|
|
|
s = env.reset()
|
|
|
|
while True:
|
|
|
|
a = heuristic(env, s)
|
|
|
|
s, r, done, info = env.step(a)
|
|
|
|
total_reward += r
|
|
|
|
|
|
|
|
if render:
|
|
|
|
still_open = env.render()
|
|
|
|
if still_open == False: break
|
|
|
|
|
|
|
|
if steps % 20 == 0 or done:
|
|
|
|
print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
|
|
|
|
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
|
|
|
steps += 1
|
|
|
|
if done: break
|
|
|
|
return total_reward
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
demo_heuristic_lander(LunarLander(), render=True)
|