mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-20 05:52:03 +00:00
LunarLanderContinuous (#307)
* New LunarLanderContinuous, LunarLander-v2 remains exactly the same, no version bump. * keyboard_agent.py works again.
This commit is contained in:
committed by
Greg Brockman
parent
c97551e8e5
commit
ee2c0243c0
@@ -22,13 +22,13 @@ def key_press(key, mod):
|
||||
global human_agent_action, human_wants_restart, human_sets_pause
|
||||
if key==0xff0d: human_wants_restart = True
|
||||
if key==32: human_sets_pause = not human_sets_pause
|
||||
a = key - ord('0')
|
||||
a = int( key - ord('0') )
|
||||
if a <= 0 or a >= ACTIONS: return
|
||||
human_agent_action = a
|
||||
|
||||
def key_release(key, mod):
|
||||
global human_agent_action
|
||||
a = key - ord('0')
|
||||
a = int( key - ord('0') )
|
||||
if a <= 0 or a >= ACTIONS: return
|
||||
if human_agent_action == a:
|
||||
human_agent_action = 0
|
||||
|
@@ -100,6 +100,13 @@ register(
|
||||
reward_threshold=200,
|
||||
)
|
||||
|
||||
register(
|
||||
id='LunarLanderContinuous-v2',
|
||||
entry_point='gym.envs.box2d:LunarLanderContinuous',
|
||||
timestep_limit=1000,
|
||||
reward_threshold=200,
|
||||
)
|
||||
|
||||
register(
|
||||
id='BipedalWalker-v2',
|
||||
entry_point='gym.envs.box2d:BipedalWalker',
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from gym.envs.box2d.lunar_lander import LunarLander
|
||||
from gym.envs.box2d.lunar_lander import LunarLanderContinuous
|
||||
from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
|
||||
from gym.envs.box2d.car_racing import CarRacing
|
||||
|
@@ -76,6 +76,8 @@ class LunarLander(gym.Env):
|
||||
'video.frames_per_second' : FPS
|
||||
}
|
||||
|
||||
continuous = False
|
||||
|
||||
def __init__(self):
|
||||
self._seed()
|
||||
self.viewer = None
|
||||
@@ -87,12 +89,18 @@ class LunarLander(gym.Env):
|
||||
|
||||
self.prev_reward = None
|
||||
|
||||
# useful range is -1 .. +1
|
||||
high = np.array([np.inf]*8)
|
||||
# nop, fire left engine, main engine, right engine
|
||||
self.action_space = spaces.Discrete(4)
|
||||
high = np.array([np.inf]*8) # useful range is -1 .. +1, but spikes can be higher
|
||||
self.observation_space = spaces.Box(-high, high)
|
||||
|
||||
|
||||
if self.continuous:
|
||||
# Action is two floats [main engine, left-right engines].
|
||||
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
|
||||
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
|
||||
self.action_space = spaces.Box(-1, +1, (2,))
|
||||
else:
|
||||
# Nop, fire left engine, main engine, right engine
|
||||
self.action_space = spaces.Discrete(4)
|
||||
|
||||
self._reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
@@ -203,9 +211,9 @@ class LunarLander(gym.Env):
|
||||
|
||||
self.drawlist = [self.lander] + self.legs
|
||||
|
||||
return self._step(0)[0]
|
||||
return self._step(np.array([0,0]) if self.continuous else 0)[0]
|
||||
|
||||
def _create_particle(self, mass, x, y):
|
||||
def _create_particle(self, mass, x, y, ttl):
|
||||
p = self.world.CreateDynamicBody(
|
||||
position = (x,y),
|
||||
angle=0.0,
|
||||
@@ -217,7 +225,7 @@ class LunarLander(gym.Env):
|
||||
maskBits=0x001, # collide only with ground
|
||||
restitution=0.3)
|
||||
)
|
||||
p.ttl = 1
|
||||
p.ttl = ttl
|
||||
self.particles.append(p)
|
||||
self._clean_particles(False)
|
||||
return p
|
||||
@@ -233,22 +241,38 @@ class LunarLander(gym.Env):
|
||||
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
|
||||
side = (-tip[1], tip[0]);
|
||||
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
||||
if action==2: # Main engine
|
||||
|
||||
m_power = 0.0
|
||||
if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
|
||||
# Main engine
|
||||
if self.continuous:
|
||||
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
|
||||
assert m_power>=0.5 and m_power <= 1.0
|
||||
else:
|
||||
m_power = 1.0
|
||||
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
|
||||
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
|
||||
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
|
||||
p = self._create_particle(3.5, *impulse_pos) # particles are just a decoration, 3.5 is here to make particle speed adequate
|
||||
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER, oy*MAIN_ENGINE_POWER), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER, -oy*MAIN_ENGINE_POWER), impulse_pos, True)
|
||||
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
|
||||
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER*m_power, oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||
|
||||
if action==1 or action==3: # Orientation engines
|
||||
direction = action-2
|
||||
s_power = 0.0
|
||||
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
|
||||
# Orientation engines
|
||||
if self.continuous:
|
||||
direction = np.sign(action[1])
|
||||
s_power = np.clip(np.abs(action[1]), 0.5,1.0)
|
||||
assert s_power>=0.5 and s_power <= 1.0
|
||||
else:
|
||||
direction = action-2
|
||||
s_power = 1.0
|
||||
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
|
||||
p = self._create_particle(0.7, *impulse_pos)
|
||||
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER, oy*SIDE_ENGINE_POWER), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER, -oy*SIDE_ENGINE_POWER), impulse_pos, True)
|
||||
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
|
||||
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER*s_power, oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||
|
||||
self.world.Step(1.0/FPS, 6*30, 2*30)
|
||||
|
||||
@@ -276,10 +300,8 @@ class LunarLander(gym.Env):
|
||||
reward = shaping - self.prev_shaping
|
||||
self.prev_shaping = shaping
|
||||
|
||||
if action==2: # main engine
|
||||
reward -= 0.30 # less fuel spent is better, about -30 for heurisic landing
|
||||
elif action != 0:
|
||||
reward -= 0.03
|
||||
reward -= m_power*0.30 # less fuel spent is better, about -30 for heurisic landing
|
||||
reward -= s_power*0.03
|
||||
|
||||
done = False
|
||||
if self.game_over or abs(state[0]) >= 1.0:
|
||||
@@ -333,42 +355,53 @@ class LunarLander(gym.Env):
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
class LunarLanderContinuous(LunarLander):
|
||||
continuous = True
|
||||
|
||||
def heuristic(env, s):
|
||||
# Heuristic for:
|
||||
# 1. Testing.
|
||||
# 2. Demonstration rollout.
|
||||
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
|
||||
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
|
||||
if angle_targ < -0.4: angle_targ = -0.4
|
||||
hover_targ = 0.55*np.abs(s[0]) # target y should be proporional to horizontal offset
|
||||
|
||||
# PID controller: s[4] angle, s[5] angularSpeed
|
||||
angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
|
||||
#print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
|
||||
|
||||
# PID controller: s[1] vertical coordinate s[3] vertical speed
|
||||
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
|
||||
#print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
|
||||
|
||||
if s[6] or s[7]: # legs have contact
|
||||
angle_todo = 0
|
||||
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
|
||||
|
||||
if env.continuous:
|
||||
a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
|
||||
a = np.clip(a, -1, +1)
|
||||
else:
|
||||
a = 0
|
||||
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
|
||||
elif angle_todo < -0.05: a = 3
|
||||
elif angle_todo > +0.05: a = 1
|
||||
return a
|
||||
|
||||
if __name__=="__main__":
|
||||
# Heuristic for testing.
|
||||
env = LunarLander()
|
||||
env.reset()
|
||||
steps = 0
|
||||
#env = LunarLander()
|
||||
env = LunarLanderContinuous()
|
||||
s = env.reset()
|
||||
total_reward = 0
|
||||
a = 0
|
||||
steps = 0
|
||||
while True:
|
||||
a = heuristic(env, s)
|
||||
s, r, done, info = env.step(a)
|
||||
env.render()
|
||||
total_reward += r
|
||||
if steps % 20 == 0 or done:
|
||||
print(["{:+0.2f}".format(x) for x in s])
|
||||
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
||||
steps += 1
|
||||
|
||||
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
|
||||
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
|
||||
if angle_targ < -0.4: angle_targ = -0.4
|
||||
hover_targ = 0.55*np.abs(s[0]) # target y should be proporional to horizontal offset
|
||||
|
||||
# PID controller: s[4] angle, s[5] angularSpeed
|
||||
angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
|
||||
#print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
|
||||
|
||||
# PID controller: s[1] vertical coordinate s[3] vertical speed
|
||||
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
|
||||
#print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
|
||||
|
||||
if s[6] or s[7]: # legs have contact
|
||||
angle_todo = 0
|
||||
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
|
||||
|
||||
a = 0
|
||||
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
|
||||
elif angle_todo < -0.05: a = 3
|
||||
elif angle_todo > +0.05: a = 1
|
||||
|
||||
env.render()
|
||||
if done: break
|
||||
|
@@ -303,6 +303,27 @@ comes to rest, receiving additional -100 or +100 points. Each leg ground contact
|
||||
engine is -0.3 points each frame. Solved is 200 points.
|
||||
Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
||||
on its first attempt.
|
||||
Four discrete actions available: do nothing, fire left orientation engine, fire main engine, fire
|
||||
right orientation engine.
|
||||
""")
|
||||
|
||||
add_task(
|
||||
id='LunarLanderContinuous-v2',
|
||||
group='box2d',
|
||||
experimental=True,
|
||||
contributor='olegklimov',
|
||||
summary='Navigate a lander to its landing pad.',
|
||||
description="""
|
||||
Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
|
||||
Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
|
||||
If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
|
||||
comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
|
||||
engine is -0.3 points each frame. Solved is 200 points.
|
||||
Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
||||
on its first attempt.
|
||||
Action is two real values vector from -1 to +1. First controls main engine, -1..0 off, 0..+1 throttle
|
||||
from 50% to 100% power. Engine can't work with less than 50% power. Second value -1.0..-0.5 fire left
|
||||
engine, +0.5..+1.0 fire right engine, -0.5..0.5 off.
|
||||
""")
|
||||
|
||||
add_task(
|
||||
|
Reference in New Issue
Block a user