mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-20 05:52:03 +00:00
LunarLanderContinuous (#307)
* New LunarLanderContinuous, LunarLander-v2 remains exactly the same, no version bump. * keyboard_agent.py works again.
This commit is contained in:
committed by
Greg Brockman
parent
c97551e8e5
commit
ee2c0243c0
@@ -22,13 +22,13 @@ def key_press(key, mod):
|
|||||||
global human_agent_action, human_wants_restart, human_sets_pause
|
global human_agent_action, human_wants_restart, human_sets_pause
|
||||||
if key==0xff0d: human_wants_restart = True
|
if key==0xff0d: human_wants_restart = True
|
||||||
if key==32: human_sets_pause = not human_sets_pause
|
if key==32: human_sets_pause = not human_sets_pause
|
||||||
a = key - ord('0')
|
a = int( key - ord('0') )
|
||||||
if a <= 0 or a >= ACTIONS: return
|
if a <= 0 or a >= ACTIONS: return
|
||||||
human_agent_action = a
|
human_agent_action = a
|
||||||
|
|
||||||
def key_release(key, mod):
|
def key_release(key, mod):
|
||||||
global human_agent_action
|
global human_agent_action
|
||||||
a = key - ord('0')
|
a = int( key - ord('0') )
|
||||||
if a <= 0 or a >= ACTIONS: return
|
if a <= 0 or a >= ACTIONS: return
|
||||||
if human_agent_action == a:
|
if human_agent_action == a:
|
||||||
human_agent_action = 0
|
human_agent_action = 0
|
||||||
|
@@ -100,6 +100,13 @@ register(
|
|||||||
reward_threshold=200,
|
reward_threshold=200,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='LunarLanderContinuous-v2',
|
||||||
|
entry_point='gym.envs.box2d:LunarLanderContinuous',
|
||||||
|
timestep_limit=1000,
|
||||||
|
reward_threshold=200,
|
||||||
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='BipedalWalker-v2',
|
id='BipedalWalker-v2',
|
||||||
entry_point='gym.envs.box2d:BipedalWalker',
|
entry_point='gym.envs.box2d:BipedalWalker',
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
from gym.envs.box2d.lunar_lander import LunarLander
|
from gym.envs.box2d.lunar_lander import LunarLander
|
||||||
|
from gym.envs.box2d.lunar_lander import LunarLanderContinuous
|
||||||
from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
|
from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
|
||||||
from gym.envs.box2d.car_racing import CarRacing
|
from gym.envs.box2d.car_racing import CarRacing
|
||||||
|
@@ -76,6 +76,8 @@ class LunarLander(gym.Env):
|
|||||||
'video.frames_per_second' : FPS
|
'video.frames_per_second' : FPS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
continuous = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._seed()
|
self._seed()
|
||||||
self.viewer = None
|
self.viewer = None
|
||||||
@@ -87,12 +89,18 @@ class LunarLander(gym.Env):
|
|||||||
|
|
||||||
self.prev_reward = None
|
self.prev_reward = None
|
||||||
|
|
||||||
# useful range is -1 .. +1
|
high = np.array([np.inf]*8) # useful range is -1 .. +1, but spikes can be higher
|
||||||
high = np.array([np.inf]*8)
|
|
||||||
# nop, fire left engine, main engine, right engine
|
|
||||||
self.action_space = spaces.Discrete(4)
|
|
||||||
self.observation_space = spaces.Box(-high, high)
|
self.observation_space = spaces.Box(-high, high)
|
||||||
|
|
||||||
|
if self.continuous:
|
||||||
|
# Action is two floats [main engine, left-right engines].
|
||||||
|
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
|
||||||
|
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
|
||||||
|
self.action_space = spaces.Box(-1, +1, (2,))
|
||||||
|
else:
|
||||||
|
# Nop, fire left engine, main engine, right engine
|
||||||
|
self.action_space = spaces.Discrete(4)
|
||||||
|
|
||||||
self._reset()
|
self._reset()
|
||||||
|
|
||||||
def _seed(self, seed=None):
|
def _seed(self, seed=None):
|
||||||
@@ -203,9 +211,9 @@ class LunarLander(gym.Env):
|
|||||||
|
|
||||||
self.drawlist = [self.lander] + self.legs
|
self.drawlist = [self.lander] + self.legs
|
||||||
|
|
||||||
return self._step(0)[0]
|
return self._step(np.array([0,0]) if self.continuous else 0)[0]
|
||||||
|
|
||||||
def _create_particle(self, mass, x, y):
|
def _create_particle(self, mass, x, y, ttl):
|
||||||
p = self.world.CreateDynamicBody(
|
p = self.world.CreateDynamicBody(
|
||||||
position = (x,y),
|
position = (x,y),
|
||||||
angle=0.0,
|
angle=0.0,
|
||||||
@@ -217,7 +225,7 @@ class LunarLander(gym.Env):
|
|||||||
maskBits=0x001, # collide only with ground
|
maskBits=0x001, # collide only with ground
|
||||||
restitution=0.3)
|
restitution=0.3)
|
||||||
)
|
)
|
||||||
p.ttl = 1
|
p.ttl = ttl
|
||||||
self.particles.append(p)
|
self.particles.append(p)
|
||||||
self._clean_particles(False)
|
self._clean_particles(False)
|
||||||
return p
|
return p
|
||||||
@@ -233,22 +241,38 @@ class LunarLander(gym.Env):
|
|||||||
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
|
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
|
||||||
side = (-tip[1], tip[0]);
|
side = (-tip[1], tip[0]);
|
||||||
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
||||||
if action==2: # Main engine
|
|
||||||
|
m_power = 0.0
|
||||||
|
if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
|
||||||
|
# Main engine
|
||||||
|
if self.continuous:
|
||||||
|
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
|
||||||
|
assert m_power>=0.5 and m_power <= 1.0
|
||||||
|
else:
|
||||||
|
m_power = 1.0
|
||||||
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
|
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
|
||||||
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
|
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
|
||||||
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
|
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
|
||||||
p = self._create_particle(3.5, *impulse_pos) # particles are just a decoration, 3.5 is here to make particle speed adequate
|
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
|
||||||
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER, oy*MAIN_ENGINE_POWER), impulse_pos, True)
|
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER*m_power, oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||||
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER, -oy*MAIN_ENGINE_POWER), impulse_pos, True)
|
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||||
|
|
||||||
if action==1 or action==3: # Orientation engines
|
s_power = 0.0
|
||||||
|
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
|
||||||
|
# Orientation engines
|
||||||
|
if self.continuous:
|
||||||
|
direction = np.sign(action[1])
|
||||||
|
s_power = np.clip(np.abs(action[1]), 0.5,1.0)
|
||||||
|
assert s_power>=0.5 and s_power <= 1.0
|
||||||
|
else:
|
||||||
direction = action-2
|
direction = action-2
|
||||||
|
s_power = 1.0
|
||||||
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||||
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||||
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
|
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
|
||||||
p = self._create_particle(0.7, *impulse_pos)
|
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
|
||||||
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER, oy*SIDE_ENGINE_POWER), impulse_pos, True)
|
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER*s_power, oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||||
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER, -oy*SIDE_ENGINE_POWER), impulse_pos, True)
|
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||||
|
|
||||||
self.world.Step(1.0/FPS, 6*30, 2*30)
|
self.world.Step(1.0/FPS, 6*30, 2*30)
|
||||||
|
|
||||||
@@ -276,10 +300,8 @@ class LunarLander(gym.Env):
|
|||||||
reward = shaping - self.prev_shaping
|
reward = shaping - self.prev_shaping
|
||||||
self.prev_shaping = shaping
|
self.prev_shaping = shaping
|
||||||
|
|
||||||
if action==2: # main engine
|
reward -= m_power*0.30 # less fuel spent is better, about -30 for heurisic landing
|
||||||
reward -= 0.30 # less fuel spent is better, about -30 for heurisic landing
|
reward -= s_power*0.03
|
||||||
elif action != 0:
|
|
||||||
reward -= 0.03
|
|
||||||
|
|
||||||
done = False
|
done = False
|
||||||
if self.game_over or abs(state[0]) >= 1.0:
|
if self.game_over or abs(state[0]) >= 1.0:
|
||||||
@@ -333,21 +355,13 @@ class LunarLander(gym.Env):
|
|||||||
|
|
||||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||||
|
|
||||||
if __name__=="__main__":
|
class LunarLanderContinuous(LunarLander):
|
||||||
# Heuristic for testing.
|
continuous = True
|
||||||
env = LunarLander()
|
|
||||||
env.reset()
|
|
||||||
steps = 0
|
|
||||||
total_reward = 0
|
|
||||||
a = 0
|
|
||||||
while True:
|
|
||||||
s, r, done, info = env.step(a)
|
|
||||||
total_reward += r
|
|
||||||
if steps % 20 == 0 or done:
|
|
||||||
print(["{:+0.2f}".format(x) for x in s])
|
|
||||||
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
|
||||||
steps += 1
|
|
||||||
|
|
||||||
|
def heuristic(env, s):
|
||||||
|
# Heuristic for:
|
||||||
|
# 1. Testing.
|
||||||
|
# 2. Demonstration rollout.
|
||||||
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
|
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
|
||||||
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
|
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
|
||||||
if angle_targ < -0.4: angle_targ = -0.4
|
if angle_targ < -0.4: angle_targ = -0.4
|
||||||
@@ -365,10 +379,29 @@ if __name__=="__main__":
|
|||||||
angle_todo = 0
|
angle_todo = 0
|
||||||
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
|
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
|
||||||
|
|
||||||
|
if env.continuous:
|
||||||
|
a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
|
||||||
|
a = np.clip(a, -1, +1)
|
||||||
|
else:
|
||||||
a = 0
|
a = 0
|
||||||
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
|
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
|
||||||
elif angle_todo < -0.05: a = 3
|
elif angle_todo < -0.05: a = 3
|
||||||
elif angle_todo > +0.05: a = 1
|
elif angle_todo > +0.05: a = 1
|
||||||
|
return a
|
||||||
|
|
||||||
|
if __name__=="__main__":
|
||||||
|
#env = LunarLander()
|
||||||
|
env = LunarLanderContinuous()
|
||||||
|
s = env.reset()
|
||||||
|
total_reward = 0
|
||||||
|
steps = 0
|
||||||
|
while True:
|
||||||
|
a = heuristic(env, s)
|
||||||
|
s, r, done, info = env.step(a)
|
||||||
env.render()
|
env.render()
|
||||||
|
total_reward += r
|
||||||
|
if steps % 20 == 0 or done:
|
||||||
|
print(["{:+0.2f}".format(x) for x in s])
|
||||||
|
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
||||||
|
steps += 1
|
||||||
if done: break
|
if done: break
|
||||||
|
@@ -303,6 +303,27 @@ comes to rest, receiving additional -100 or +100 points. Each leg ground contact
|
|||||||
engine is -0.3 points each frame. Solved is 200 points.
|
engine is -0.3 points each frame. Solved is 200 points.
|
||||||
Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
||||||
on its first attempt.
|
on its first attempt.
|
||||||
|
Four discrete actions available: do nothing, fire left orientation engine, fire main engine, fire
|
||||||
|
right orientation engine.
|
||||||
|
""")
|
||||||
|
|
||||||
|
add_task(
|
||||||
|
id='LunarLanderContinuous-v2',
|
||||||
|
group='box2d',
|
||||||
|
experimental=True,
|
||||||
|
contributor='olegklimov',
|
||||||
|
summary='Navigate a lander to its landing pad.',
|
||||||
|
description="""
|
||||||
|
Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
|
||||||
|
Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
|
||||||
|
If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
|
||||||
|
comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
|
||||||
|
engine is -0.3 points each frame. Solved is 200 points.
|
||||||
|
Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
||||||
|
on its first attempt.
|
||||||
|
Action is two real values vector from -1 to +1. First controls main engine, -1..0 off, 0..+1 throttle
|
||||||
|
from 50% to 100% power. Engine can't work with less than 50% power. Second value -1.0..-0.5 fire left
|
||||||
|
engine, +0.5..+1.0 fire right engine, -0.5..0.5 off.
|
||||||
""")
|
""")
|
||||||
|
|
||||||
add_task(
|
add_task(
|
||||||
|
Reference in New Issue
Block a user