mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-23 23:12:46 +00:00
Clean some docstrings (#1854)
* add type of argument * fix typos * split lines for formatting * reformat string, add ellipsis, remove r string * make docstring stylistically consistent * make docstrings a little more elaboratet * reduce by 1 space * make line wrap 120 * remove unnecessary line * add returns to docstring * add docstring, make code more pep8 and delete some unused print functions * more pep8 * file docstring instead of comments * delete unused variables, add file docstring and add some pep8 spring cleaning * add file docstring, fix typos and add some pep8 correections Co-authored-by: Dan <daniel.timbrell@ing.com>
This commit is contained in:
@@ -11,12 +11,20 @@ def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
|
||||
"""
|
||||
Generic implementation of the cross-entropy method for maximizing a black-box function
|
||||
|
||||
Args:
|
||||
f: a function mapping from vector -> scalar
|
||||
th_mean: initial mean over input distribution
|
||||
batch_size: number of samples of theta to evaluate per batch
|
||||
n_iter: number of batches
|
||||
elite_frac: each batch, select this fraction of the top-performing samples
|
||||
initial_std: initial standard deviation over parameter vectors
|
||||
th_mean (np.array): initial mean over input distribution
|
||||
batch_size (int): number of samples of theta to evaluate per batch
|
||||
n_iter (int): number of batches
|
||||
elite_frac (float): each batch, select this fraction of the top-performing samples
|
||||
initial_std (float): initial standard deviation over parameter vectors
|
||||
|
||||
returns:
|
||||
A generator of dicts. Subsequent dicts correspond to iterations of CEM algorithm.
|
||||
The dicts contain the following values:
|
||||
'ys' : numpy array with values of function evaluated at current population
|
||||
'ys_mean': mean value of function over current population
|
||||
'theta_mean': mean value of the parameter vector over current population
|
||||
"""
|
||||
n_elite = int(np.round(batch_size*elite_frac))
|
||||
th_std = np.ones_like(th_mean) * initial_std
|
||||
|
12
gym/core.py
12
gym/core.py
@@ -6,7 +6,7 @@ env_closer = closer.Closer()
|
||||
|
||||
|
||||
class Env(object):
|
||||
r"""The main OpenAI Gym class. It encapsulates an environment with
|
||||
"""The main OpenAI Gym class. It encapsulates an environment with
|
||||
arbitrary behind-the-scenes dynamics. An environment can be
|
||||
partially or fully observed.
|
||||
|
||||
@@ -26,9 +26,7 @@ class Env(object):
|
||||
|
||||
Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
|
||||
|
||||
The methods are accessed publicly as "step", "reset", etc.. The
|
||||
non-underscored versions are wrapper methods to which we may add
|
||||
functionality over time.
|
||||
The methods are accessed publicly as "step", "reset", etc...
|
||||
"""
|
||||
# Set this in SOME subclasses
|
||||
metadata = {'render.modes': []}
|
||||
@@ -174,9 +172,9 @@ class GoalEnv(Env):
|
||||
|
||||
def compute_reward(self, achieved_goal, desired_goal, info):
|
||||
"""Compute the step reward. This externalizes the reward function and makes
|
||||
it dependent on an a desired goal and the one that was achieved. If you wish to include
|
||||
it dependent on a desired goal and the one that was achieved. If you wish to include
|
||||
additional rewards that are independent of the goal, you can include the necessary values
|
||||
to derive it in info and compute it accordingly.
|
||||
to derive it in 'info' and compute it accordingly.
|
||||
|
||||
Args:
|
||||
achieved_goal (object): the goal that was achieved during execution
|
||||
@@ -194,7 +192,7 @@ class GoalEnv(Env):
|
||||
|
||||
|
||||
class Wrapper(Env):
|
||||
r"""Wraps the environment to allow a modular transformation.
|
||||
"""Wraps the environment to allow a modular transformation.
|
||||
|
||||
This class is the base class for all wrappers. The subclass could override
|
||||
some methods to change the behavior of the original environment without touching the
|
||||
|
@@ -1,15 +1,17 @@
|
||||
"""
|
||||
Top-down car dynamics simulation.
|
||||
|
||||
Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
|
||||
This simulation is a bit more detailed, with wheels rotation.
|
||||
|
||||
Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
import Box2D
|
||||
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
|
||||
|
||||
# Top-down car dynamics simulation.
|
||||
#
|
||||
# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
|
||||
# This simulation is a bit more detailed, with wheels rotation.
|
||||
#
|
||||
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
|
||||
SIZE = 0.02
|
||||
ENGINE_POWER = 100000000*SIZE*SIZE
|
||||
WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
|
||||
@@ -17,18 +19,18 @@ FRICTION_LIMIT = 1000000*SIZE*SIZE # friction ~= mass ~= size^2 (ca
|
||||
WHEEL_R = 27
|
||||
WHEEL_W = 14
|
||||
WHEELPOS = [
|
||||
(-55,+80), (+55,+80),
|
||||
(-55,-82), (+55,-82)
|
||||
(-55, +80), (+55, +80),
|
||||
(-55, -82), (+55, -82)
|
||||
]
|
||||
HULL_POLY1 =[
|
||||
(-60,+130), (+60,+130),
|
||||
(+60,+110), (-60,+110)
|
||||
HULL_POLY1 = [
|
||||
(-60, +130), (+60, +130),
|
||||
(+60, +110), (-60, +110)
|
||||
]
|
||||
HULL_POLY2 =[
|
||||
(-15,+120), (+15,+120),
|
||||
HULL_POLY2 = [
|
||||
(-15, +120), (+15, +120),
|
||||
(+20, +20), (-20, 20)
|
||||
]
|
||||
HULL_POLY3 =[
|
||||
HULL_POLY3 = [
|
||||
(+25, +20),
|
||||
(+50, -10),
|
||||
(+50, -40),
|
||||
@@ -38,41 +40,42 @@ HULL_POLY3 =[
|
||||
(-50, -10),
|
||||
(-25, +20)
|
||||
]
|
||||
HULL_POLY4 =[
|
||||
(-50,-120), (+50,-120),
|
||||
(+50,-90), (-50,-90)
|
||||
HULL_POLY4 = [
|
||||
(-50, -120), (+50, -120),
|
||||
(+50, -90), (-50, -90)
|
||||
]
|
||||
WHEEL_COLOR = (0.0,0.0,0.0)
|
||||
WHEEL_WHITE = (0.3,0.3,0.3)
|
||||
MUD_COLOR = (0.4,0.4,0.0)
|
||||
WHEEL_COLOR = (0.0, 0.0, 0.0)
|
||||
WHEEL_WHITE = (0.3, 0.3, 0.3)
|
||||
MUD_COLOR = (0.4, 0.4, 0.0)
|
||||
|
||||
|
||||
class Car:
|
||||
def __init__(self, world, init_angle, init_x, init_y):
|
||||
self.world = world
|
||||
self.hull = self.world.CreateDynamicBody(
|
||||
position = (init_x, init_y),
|
||||
angle = init_angle,
|
||||
fixtures = [
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
|
||||
position=(init_x, init_y),
|
||||
angle=init_angle,
|
||||
fixtures=[
|
||||
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY1]), density=1.0),
|
||||
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY2]), density=1.0),
|
||||
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY3]), density=1.0),
|
||||
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY4]), density=1.0)
|
||||
]
|
||||
)
|
||||
self.hull.color = (0.8,0.0,0.0)
|
||||
self.hull.color = (0.8, 0.0, 0.0)
|
||||
self.wheels = []
|
||||
self.fuel_spent = 0.0
|
||||
WHEEL_POLY = [
|
||||
(-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
|
||||
(+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
|
||||
(-WHEEL_W, +WHEEL_R), (+WHEEL_W, +WHEEL_R),
|
||||
(+WHEEL_W, -WHEEL_R), (-WHEEL_W, -WHEEL_R)
|
||||
]
|
||||
for wx,wy in WHEELPOS:
|
||||
for wx, wy in WHEELPOS:
|
||||
front_k = 1.0 if wy > 0 else 1.0
|
||||
w = self.world.CreateDynamicBody(
|
||||
position = (init_x+wx*SIZE, init_y+wy*SIZE),
|
||||
angle = init_angle,
|
||||
fixtures = fixtureDef(
|
||||
shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
|
||||
position=(init_x+wx*SIZE, init_y+wy*SIZE),
|
||||
angle=init_angle,
|
||||
fixtures=fixtureDef(
|
||||
shape=polygonShape(vertices=[(x*front_k*SIZE,y*front_k*SIZE) for x, y in WHEEL_POLY]),
|
||||
density=0.1,
|
||||
categoryBits=0x0020,
|
||||
maskBits=0x001,
|
||||
@@ -90,14 +93,14 @@ class Car:
|
||||
rjd = revoluteJointDef(
|
||||
bodyA=self.hull,
|
||||
bodyB=w,
|
||||
localAnchorA=(wx*SIZE,wy*SIZE),
|
||||
localAnchorA=(wx*SIZE, wy*SIZE),
|
||||
localAnchorB=(0,0),
|
||||
enableMotor=True,
|
||||
enableLimit=True,
|
||||
maxMotorTorque=180*900*SIZE*SIZE,
|
||||
motorSpeed = 0,
|
||||
lowerAngle = -0.4,
|
||||
upperAngle = +0.4,
|
||||
motorSpeed=0,
|
||||
lowerAngle=-0.4,
|
||||
upperAngle=+0.4,
|
||||
)
|
||||
w.joint = self.world.CreateJoint(rjd)
|
||||
w.tiles = set()
|
||||
@@ -107,7 +110,11 @@ class Car:
|
||||
self.particles = []
|
||||
|
||||
def gas(self, gas):
|
||||
'control: rear wheel drive'
|
||||
"""control: rear wheel drive
|
||||
|
||||
Args:
|
||||
gas (float): How much gas gets applied. Gets clipped between 0 and 1.
|
||||
"""
|
||||
gas = np.clip(gas, 0, 1)
|
||||
for w in self.wheels[2:4]:
|
||||
diff = gas - w.gas
|
||||
@@ -115,12 +122,18 @@ class Car:
|
||||
w.gas += diff
|
||||
|
||||
def brake(self, b):
|
||||
'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
|
||||
"""control: brake
|
||||
|
||||
Args:
|
||||
b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation"""
|
||||
for w in self.wheels:
|
||||
w.brake = b
|
||||
|
||||
def steer(self, s):
|
||||
'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
|
||||
"""control: steer
|
||||
|
||||
Args:
|
||||
s (-1..1): target position, it takes time to rotate steering wheel from side-to-side"""
|
||||
self.wheels[0].steer = s
|
||||
self.wheels[1].steer = s
|
||||
|
||||
@@ -148,7 +161,9 @@ class Car:
|
||||
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
|
||||
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
|
||||
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
|
||||
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0) # small coef not to divide by zero
|
||||
|
||||
# add small coef not to divide by zero
|
||||
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)
|
||||
self.fuel_spent += dt*ENGINE_POWER*w.gas
|
||||
|
||||
if w.brake >= 0.9:
|
||||
@@ -167,13 +182,15 @@ class Car:
|
||||
|
||||
# Physically correct is to always apply friction_limit until speed is equal.
|
||||
# But dt is finite, that will lead to oscillations if difference is already near zero.
|
||||
f_force *= 205000*SIZE*SIZE # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
|
||||
|
||||
# Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
|
||||
f_force *= 205000*SIZE*SIZE
|
||||
p_force *= 205000*SIZE*SIZE
|
||||
force = np.sqrt(np.square(f_force) + np.square(p_force))
|
||||
|
||||
# Skid trace
|
||||
if abs(force) > 2.0*friction_limit:
|
||||
if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
|
||||
if w.skid_particle and w.skid_particle.grass == grass and len(w.skid_particle.poly) < 30:
|
||||
w.skid_particle.poly.append( (w.position[0], w.position[1]) )
|
||||
elif w.skid_start is None:
|
||||
w.skid_start = w.position
|
||||
@@ -213,9 +230,9 @@ class Car:
|
||||
s2 = math.sin(a2)
|
||||
c1 = math.cos(a1)
|
||||
c2 = math.cos(a2)
|
||||
if s1>0 and s2>0: continue
|
||||
if s1>0: c1 = np.sign(c1)
|
||||
if s2>0: c2 = np.sign(c2)
|
||||
if s1 > 0 and s2 > 0: continue
|
||||
if s1 > 0: c1 = np.sign(c1)
|
||||
if s2 > 0: c2 = np.sign(c2)
|
||||
white_poly = [
|
||||
(-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
|
||||
(+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
|
||||
@@ -228,7 +245,7 @@ class Car:
|
||||
p = Particle()
|
||||
p.color = WHEEL_COLOR if not grass else MUD_COLOR
|
||||
p.ttl = 1
|
||||
p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
|
||||
p.poly = [(point1[0], point1[1]), (point2[0], point2[1])]
|
||||
p.grass = grass
|
||||
self.particles.append(p)
|
||||
while len(self.particles) > 30:
|
||||
|
@@ -1,3 +1,33 @@
|
||||
"""
|
||||
|
||||
Easiest continuous control task to learn from pixels, a top-down racing environment.
|
||||
Discrete control is reasonable in this environment as well, on/off discretization is
|
||||
fine.
|
||||
|
||||
State consists of STATE_W x STATE_H pixels.
|
||||
|
||||
The reward is -0.1 every frame and +1000/N for every track tile visited, where N is
|
||||
the total number of tiles visited in the track. For example, if you have finished in 732 frames,
|
||||
your reward is 1000 - 0.1*732 = 926.8 points.
|
||||
|
||||
The game is solved when the agent consistently gets 900+ points. The generated track is random every episode.
|
||||
|
||||
The episode finishes when all the tiles are visited. The car also can go outside of the PLAYFIELD - that
|
||||
is far off the track, then it will get -100 and die.
|
||||
|
||||
Some indicators are shown at the bottom of the window along with the state RGB buffer. From
|
||||
left to right: the true speed, four ABS sensors, the steering wheel position and gyroscope.
|
||||
|
||||
To play yourself (it's rather fast for humans), type:
|
||||
|
||||
python gym/envs/box2d/car_racing.py
|
||||
|
||||
Remember it's a powerful rear-wheel drive car - don't press the accelerator and turn at the
|
||||
same time.
|
||||
|
||||
Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
"""
|
||||
|
||||
import sys, math
|
||||
import numpy as np
|
||||
|
||||
@@ -12,33 +42,6 @@ from gym.utils import colorize, seeding, EzPickle
|
||||
import pyglet
|
||||
from pyglet import gl
|
||||
|
||||
# Easiest continuous control task to learn from pixels, a top-down racing environment.
|
||||
# Discrete control is reasonable in this environment as well, on/off discretization is
|
||||
# fine.
|
||||
#
|
||||
# State consists of STATE_W x STATE_H pixels.
|
||||
#
|
||||
# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
|
||||
# the total number of tiles visited in the track. For example, if you have finished in 732 frames,
|
||||
# your reward is 1000 - 0.1*732 = 926.8 points.
|
||||
#
|
||||
# Game is solved when agent consistently gets 900+ points. Track generated is random every episode.
|
||||
#
|
||||
# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
|
||||
# is far off the track, then it will get -100 and die.
|
||||
#
|
||||
# Some indicators shown at the bottom of the window and the state RGB buffer. From
|
||||
# left to right: true speed, four ABS sensors, steering wheel position and gyroscope.
|
||||
#
|
||||
# To play yourself (it's rather fast for humans), type:
|
||||
#
|
||||
# python gym/envs/box2d/car_racing.py
|
||||
#
|
||||
# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
|
||||
# same time.
|
||||
#
|
||||
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
|
||||
STATE_W = 96 # less than Atari 160x192
|
||||
STATE_H = 96
|
||||
VIDEO_W = 600
|
||||
@@ -62,14 +65,18 @@ BORDER_MIN_COUNT = 4
|
||||
|
||||
ROAD_COLOR = [0.4, 0.4, 0.4]
|
||||
|
||||
|
||||
class FrictionDetector(contactListener):
|
||||
def __init__(self, env):
|
||||
contactListener.__init__(self)
|
||||
self.env = env
|
||||
|
||||
def BeginContact(self, contact):
|
||||
self._contact(contact, True)
|
||||
|
||||
def EndContact(self, contact):
|
||||
self._contact(contact, False)
|
||||
|
||||
def _contact(self, contact, begin):
|
||||
tile = None
|
||||
obj = None
|
||||
@@ -91,14 +98,12 @@ class FrictionDetector(contactListener):
|
||||
return
|
||||
if begin:
|
||||
obj.tiles.add(tile)
|
||||
# print tile.road_friction, "ADD", len(obj.tiles)
|
||||
if not tile.road_visited:
|
||||
tile.road_visited = True
|
||||
self.env.reward += 1000.0/len(self.env.track)
|
||||
self.env.tile_visited_count += 1
|
||||
else:
|
||||
obj.tiles.remove(tile)
|
||||
# print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
|
||||
|
||||
class CarRacing(gym.Env, EzPickle):
|
||||
metadata = {
|
||||
@@ -120,10 +125,12 @@ class CarRacing(gym.Env, EzPickle):
|
||||
self.prev_reward = 0.0
|
||||
self.verbose = verbose
|
||||
self.fd_tile = fixtureDef(
|
||||
shape = polygonShape(vertices=
|
||||
[(0, 0),(1, 0),(1, -1),(0, -1)]))
|
||||
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))
|
||||
|
||||
self.action_space = spaces.Box(np.array([-1, 0, 0]),
|
||||
np.array([+1, +1, +1]),
|
||||
dtype=np.float32) # steer, gas, brake
|
||||
|
||||
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
|
||||
|
||||
def seed(self, seed=None):
|
||||
@@ -146,19 +153,14 @@ class CarRacing(gym.Env, EzPickle):
|
||||
for c in range(CHECKPOINTS):
|
||||
alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
|
||||
rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
|
||||
if c==0:
|
||||
if c == 0:
|
||||
alpha = 0
|
||||
rad = 1.5*TRACK_RAD
|
||||
if c==CHECKPOINTS-1:
|
||||
if c == CHECKPOINTS-1:
|
||||
alpha = 2*math.pi*c/CHECKPOINTS
|
||||
self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
|
||||
rad = 1.5*TRACK_RAD
|
||||
checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
|
||||
|
||||
# print "\n".join(str(h) for h in checkpoints)
|
||||
# self.road_poly = [ ( # uncomment this to see checkpoints
|
||||
# [ (tx,ty) for a,tx,ty in checkpoints ],
|
||||
# (0.7,0.7,0.9) ) ]
|
||||
checkpoints.append((alpha, rad*math.cos(alpha), rad*math.sin(alpha)))
|
||||
self.road = []
|
||||
|
||||
# Go from one checkpoint to another to create track
|
||||
@@ -209,31 +211,30 @@ class CarRacing(gym.Env, EzPickle):
|
||||
beta += min(TRACK_TURN_RATE, abs(0.001*proj))
|
||||
x += p1x*TRACK_DETAIL_STEP
|
||||
y += p1y*TRACK_DETAIL_STEP
|
||||
track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
|
||||
track.append((alpha,prev_beta*0.5 + beta*0.5,x,y))
|
||||
if laps > 4:
|
||||
break
|
||||
no_freeze -= 1
|
||||
if no_freeze==0:
|
||||
if no_freeze == 0:
|
||||
break
|
||||
# print "\n".join([str(t) for t in enumerate(track)])
|
||||
|
||||
# Find closed loop range i1..i2, first loop should be ignored, second is OK
|
||||
i1, i2 = -1, -1
|
||||
i = len(track)
|
||||
while True:
|
||||
i -= 1
|
||||
if i==0:
|
||||
if i == 0:
|
||||
return False # Failed
|
||||
pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
|
||||
if pass_through_start and i2==-1:
|
||||
if pass_through_start and i2 == -1:
|
||||
i2 = i
|
||||
elif pass_through_start and i1==-1:
|
||||
elif pass_through_start and i1 == -1:
|
||||
i1 = i
|
||||
break
|
||||
if self.verbose == 1:
|
||||
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
|
||||
assert i1!=-1
|
||||
assert i2!=-1
|
||||
assert i1 != -1
|
||||
assert i2 != -1
|
||||
|
||||
track = track[i1:i2-1]
|
||||
|
||||
@@ -242,8 +243,8 @@ class CarRacing(gym.Env, EzPickle):
|
||||
first_perp_y = math.sin(first_beta)
|
||||
# Length of perpendicular jump to put together head and tail
|
||||
well_glued_together = np.sqrt(
|
||||
np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
|
||||
np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
|
||||
np.square(first_perp_x*(track[0][2] - track[-1][2])) +
|
||||
np.square(first_perp_y*(track[0][3] - track[-1][3])))
|
||||
if well_glued_together > TRACK_DETAIL_STEP:
|
||||
return False
|
||||
|
||||
@@ -284,11 +285,13 @@ class CarRacing(gym.Env, EzPickle):
|
||||
self.road.append(t)
|
||||
if border[i]:
|
||||
side = np.sign(beta2 - beta1)
|
||||
b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1))
|
||||
b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
|
||||
b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2))
|
||||
b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
|
||||
self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
|
||||
b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1))
|
||||
b1_r = (x1 + side * (TRACK_WIDTH+BORDER) * math.cos(beta1),
|
||||
y1 + side * (TRACK_WIDTH+BORDER)*math.sin(beta1))
|
||||
b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2))
|
||||
b2_r = (x2 + side * (TRACK_WIDTH+BORDER) * math.cos(beta2),
|
||||
y2 + side * (TRACK_WIDTH+BORDER) * math.sin(beta2))
|
||||
self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0)))
|
||||
self.track = track
|
||||
return True
|
||||
|
||||
@@ -305,7 +308,7 @@ class CarRacing(gym.Env, EzPickle):
|
||||
if success:
|
||||
break
|
||||
if self.verbose == 1:
|
||||
print("retry to generate track (normal if there are not many of this messages)")
|
||||
print("retry to generate track (normal if there are not many instances of this message)")
|
||||
self.car = Car(self.world, *self.track[0][1:4])
|
||||
|
||||
return self.step(None)[0]
|
||||
@@ -331,7 +334,7 @@ class CarRacing(gym.Env, EzPickle):
|
||||
self.car.fuel_spent = 0.0
|
||||
step_reward = self.reward - self.prev_reward
|
||||
self.prev_reward = self.reward
|
||||
if self.tile_visited_count==len(self.track):
|
||||
if self.tile_visited_count == len(self.track):
|
||||
done = True
|
||||
x, y = self.car.hull.position
|
||||
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
|
||||
@@ -353,8 +356,6 @@ class CarRacing(gym.Env, EzPickle):
|
||||
if "t" not in self.__dict__: return # reset() not called yet
|
||||
|
||||
zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second
|
||||
zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W
|
||||
zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W
|
||||
scroll_x = self.car.hull.position[0]
|
||||
scroll_y = self.car.hull.position[1]
|
||||
angle = -self.car.hull.angle
|
||||
@@ -364,10 +365,10 @@ class CarRacing(gym.Env, EzPickle):
|
||||
self.transform.set_scale(zoom, zoom)
|
||||
self.transform.set_translation(
|
||||
WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
|
||||
WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
|
||||
WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)))
|
||||
self.transform.set_rotation(angle)
|
||||
|
||||
self.car.draw(self.viewer, mode!="state_pixels")
|
||||
self.car.draw(self.viewer, mode != "state_pixels")
|
||||
|
||||
arr = None
|
||||
win = self.viewer.window
|
||||
@@ -376,7 +377,7 @@ class CarRacing(gym.Env, EzPickle):
|
||||
|
||||
win.clear()
|
||||
t = self.transform
|
||||
if mode=='rgb_array':
|
||||
if mode == 'rgb_array':
|
||||
VP_W = VIDEO_W
|
||||
VP_H = VIDEO_H
|
||||
elif mode == 'state_pixels':
|
||||
@@ -439,17 +440,19 @@ class CarRacing(gym.Env, EzPickle):
|
||||
gl.glBegin(gl.GL_QUADS)
|
||||
s = W/40.0
|
||||
h = H/40.0
|
||||
gl.glColor4f(0,0,0,1)
|
||||
gl.glColor4f(0, 0, 0, 1)
|
||||
gl.glVertex3f(W, 0, 0)
|
||||
gl.glVertex3f(W, 5*h, 0)
|
||||
gl.glVertex3f(0, 5*h, 0)
|
||||
gl.glVertex3f(0, 0, 0)
|
||||
|
||||
def vertical_ind(place, val, color):
|
||||
gl.glColor4f(color[0], color[1], color[2], 1)
|
||||
gl.glVertex3f((place+0)*s, h + h*val, 0)
|
||||
gl.glVertex3f((place+1)*s, h + h*val, 0)
|
||||
gl.glVertex3f((place+1)*s, h, 0)
|
||||
gl.glVertex3f((place+0)*s, h, 0)
|
||||
|
||||
def horiz_ind(place, val, color):
|
||||
gl.glColor4f(color[0], color[1], color[2], 1)
|
||||
gl.glVertex3f((place+0)*s, 4*h , 0)
|
||||
@@ -457,13 +460,13 @@ class CarRacing(gym.Env, EzPickle):
|
||||
gl.glVertex3f((place+val)*s, 2*h, 0)
|
||||
gl.glVertex3f((place+0)*s, 2*h, 0)
|
||||
true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
|
||||
vertical_ind(5, 0.02*true_speed, (1,1,1))
|
||||
vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
|
||||
vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
|
||||
vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
|
||||
vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
|
||||
horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
|
||||
horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
|
||||
vertical_ind(5, 0.02*true_speed, (1, 1, 1))
|
||||
vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors
|
||||
vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0, 0, 1))
|
||||
vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2, 0, 1))
|
||||
vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2, 0, 1))
|
||||
horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0, 1, 0))
|
||||
horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1, 0, 0))
|
||||
gl.glEnd()
|
||||
self.score_label.text = "%04i" % self.reward
|
||||
self.score_label.draw()
|
||||
@@ -471,19 +474,21 @@ class CarRacing(gym.Env, EzPickle):
|
||||
|
||||
if __name__=="__main__":
|
||||
from pyglet.window import key
|
||||
a = np.array( [0.0, 0.0, 0.0] )
|
||||
a = np.array([0.0, 0.0, 0.0])
|
||||
|
||||
def key_press(k, mod):
|
||||
global restart
|
||||
if k==0xff0d: restart = True
|
||||
if k==key.LEFT: a[0] = -1.0
|
||||
if k==key.RIGHT: a[0] = +1.0
|
||||
if k==key.UP: a[1] = +1.0
|
||||
if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
|
||||
if k == 0xff0d: restart = True
|
||||
if k == key.LEFT: a[0] = -1.0
|
||||
if k == key.RIGHT: a[0] = +1.0
|
||||
if k == key.UP: a[1] = +1.0
|
||||
if k == key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
|
||||
|
||||
def key_release(k, mod):
|
||||
if k==key.LEFT and a[0]==-1.0: a[0] = 0
|
||||
if k==key.RIGHT and a[0]==+1.0: a[0] = 0
|
||||
if k==key.UP: a[1] = 0
|
||||
if k==key.DOWN: a[2] = 0
|
||||
if k == key.LEFT and a[0] == -1.0: a[0] = 0
|
||||
if k == key.RIGHT and a[0] == +1.0: a[0] = 0
|
||||
if k == key.UP: a[1] = 0
|
||||
if k == key.DOWN: a[2] = 0
|
||||
env = CarRacing()
|
||||
env.render()
|
||||
env.viewer.window.on_key_press = key_press
|
||||
@@ -504,9 +509,6 @@ if __name__=="__main__":
|
||||
if steps % 200 == 0 or done:
|
||||
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
|
||||
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
||||
#import matplotlib.pyplot as plt
|
||||
#plt.imshow(s)
|
||||
#plt.savefig("test.jpeg")
|
||||
steps += 1
|
||||
isopen = env.render()
|
||||
if done or restart or isopen == False:
|
||||
|
@@ -1,3 +1,31 @@
|
||||
"""
|
||||
Rocket trajectory optimization is a classic topic in Optimal Control.
|
||||
|
||||
According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
|
||||
turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
|
||||
|
||||
The landing pad is always at coordinates (0,0). The coordinates are the first two numbers in the state vector.
|
||||
Reward for moving from the top of the screen to the landing pad and zero speed is about 100..140 points.
|
||||
If the lander moves away from the landing pad it loses reward. The episode finishes if the lander crashes or
|
||||
comes to rest, receiving an additional -100 or +100 points. Each leg with ground contact is +10 points.
|
||||
Firing the main engine is -0.3 points each frame. Firing the side engine is -0.03 points each frame.
|
||||
Solved is 200 points.
|
||||
|
||||
Landing outside the landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
||||
on its first attempt. Please see the source code for details.
|
||||
|
||||
To see a heuristic landing, run:
|
||||
|
||||
python gym/envs/box2d/lunar_lander.py
|
||||
|
||||
To play yourself, run:
|
||||
|
||||
python examples/agents/keyboard_agent.py LunarLander-v2
|
||||
|
||||
Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
"""
|
||||
|
||||
|
||||
import sys, math
|
||||
import numpy as np
|
||||
|
||||
@@ -8,30 +36,6 @@ import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding, EzPickle
|
||||
|
||||
# Rocket trajectory optimization is a classic topic in Optimal Control.
|
||||
#
|
||||
# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
|
||||
# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
|
||||
#
|
||||
# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
|
||||
# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
|
||||
# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
|
||||
# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
|
||||
# engine is -0.3 points each frame. Firing side engine is -0.03 points each frame. Solved is 200 points.
|
||||
#
|
||||
# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
||||
# on its first attempt. Please see source code for details.
|
||||
#
|
||||
# To see heuristic landing, run:
|
||||
#
|
||||
# python gym/envs/box2d/lunar_lander.py
|
||||
#
|
||||
# To play yourself, run:
|
||||
#
|
||||
# python examples/agents/keyboard_agent.py LunarLander-v2
|
||||
#
|
||||
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
|
||||
FPS = 50
|
||||
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
|
||||
|
||||
@@ -41,8 +45,8 @@ SIDE_ENGINE_POWER = 0.6
|
||||
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
|
||||
|
||||
LANDER_POLY =[
|
||||
(-14,+17), (-17,0), (-17,-10),
|
||||
(+17,-10), (+17,0), (+14,+17)
|
||||
(-14, +17), (-17, 0), (-17 ,-10),
|
||||
(+17, -10), (+17, 0), (+14, +17)
|
||||
]
|
||||
LEG_AWAY = 20
|
||||
LEG_DOWN = 18
|
||||
@@ -55,21 +59,25 @@ SIDE_ENGINE_AWAY = 12.0
|
||||
VIEWPORT_W = 600
|
||||
VIEWPORT_H = 400
|
||||
|
||||
|
||||
class ContactDetector(contactListener):
|
||||
def __init__(self, env):
|
||||
contactListener.__init__(self)
|
||||
self.env = env
|
||||
|
||||
def BeginContact(self, contact):
|
||||
if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
|
||||
if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
|
||||
self.env.game_over = True
|
||||
for i in range(2):
|
||||
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
|
||||
self.env.legs[i].ground_contact = True
|
||||
|
||||
def EndContact(self, contact):
|
||||
for i in range(2):
|
||||
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
|
||||
self.env.legs[i].ground_contact = False
|
||||
|
||||
|
||||
class LunarLander(gym.Env, EzPickle):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
@@ -131,7 +139,7 @@ class LunarLander(gym.Env, EzPickle):
|
||||
|
||||
# terrain
|
||||
CHUNKS = 11
|
||||
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
|
||||
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,))
|
||||
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
|
||||
self.helipad_x1 = chunk_x[CHUNKS//2-1]
|
||||
self.helipad_x2 = chunk_x[CHUNKS//2+1]
|
||||
@@ -143,7 +151,7 @@ class LunarLander(gym.Env, EzPickle):
|
||||
height[CHUNKS//2+2] = self.helipad_y
|
||||
smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
|
||||
|
||||
self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
|
||||
self.moon = self.world.CreateStaticBody(shapes=edgeShape(vertices=[(0, 0), (W, 0)]))
|
||||
self.sky_polys = []
|
||||
for i in range(CHUNKS-1):
|
||||
p1 = (chunk_x[i], smooth_y[i])
|
||||
@@ -152,36 +160,36 @@ class LunarLander(gym.Env, EzPickle):
|
||||
vertices=[p1,p2],
|
||||
density=0,
|
||||
friction=0.1)
|
||||
self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
|
||||
self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
|
||||
|
||||
self.moon.color1 = (0.0,0.0,0.0)
|
||||
self.moon.color2 = (0.0,0.0,0.0)
|
||||
self.moon.color1 = (0.0, 0.0, 0.0)
|
||||
self.moon.color2 = (0.0, 0.0, 0.0)
|
||||
|
||||
initial_y = VIEWPORT_H/SCALE
|
||||
self.lander = self.world.CreateDynamicBody(
|
||||
position = (VIEWPORT_W/SCALE/2, initial_y),
|
||||
position=(VIEWPORT_W/SCALE/2, initial_y),
|
||||
angle=0.0,
|
||||
fixtures = fixtureDef(
|
||||
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
|
||||
shape=polygonShape(vertices=[(x/SCALE, y/SCALE) for x, y in LANDER_POLY]),
|
||||
density=5.0,
|
||||
friction=0.1,
|
||||
categoryBits=0x0010,
|
||||
maskBits=0x001, # collide only with ground
|
||||
restitution=0.0) # 0.99 bouncy
|
||||
)
|
||||
self.lander.color1 = (0.5,0.4,0.9)
|
||||
self.lander.color2 = (0.3,0.3,0.5)
|
||||
self.lander.color1 = (0.5, 0.4, 0.9)
|
||||
self.lander.color2 = (0.3, 0.3, 0.5)
|
||||
self.lander.ApplyForceToCenter( (
|
||||
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
|
||||
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
|
||||
), True)
|
||||
|
||||
self.legs = []
|
||||
for i in [-1,+1]:
|
||||
for i in [-1, +1]:
|
||||
leg = self.world.CreateDynamicBody(
|
||||
position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
|
||||
angle = (i*0.05),
|
||||
fixtures = fixtureDef(
|
||||
position=(VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
|
||||
angle=(i * 0.05),
|
||||
fixtures=fixtureDef(
|
||||
shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
|
||||
density=1.0,
|
||||
restitution=0.0,
|
||||
@@ -189,20 +197,20 @@ class LunarLander(gym.Env, EzPickle):
|
||||
maskBits=0x001)
|
||||
)
|
||||
leg.ground_contact = False
|
||||
leg.color1 = (0.5,0.4,0.9)
|
||||
leg.color2 = (0.3,0.3,0.5)
|
||||
leg.color1 = (0.5, 0.4, 0.9)
|
||||
leg.color2 = (0.3, 0.3, 0.5)
|
||||
rjd = revoluteJointDef(
|
||||
bodyA=self.lander,
|
||||
bodyB=leg,
|
||||
localAnchorA=(0, 0),
|
||||
localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
|
||||
localAnchorB=(i * LEG_AWAY/SCALE, LEG_DOWN/SCALE),
|
||||
enableMotor=True,
|
||||
enableLimit=True,
|
||||
maxMotorTorque=LEG_SPRING_TORQUE,
|
||||
motorSpeed=+0.3*i # low enough not to jump back into the sky
|
||||
motorSpeed=+0.3 * i # low enough not to jump back into the sky
|
||||
)
|
||||
if i==-1:
|
||||
rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within
|
||||
if i == -1:
|
||||
rjd.lowerAngle = +0.9 - 0.5 # The most esoteric numbers here, angled legs have freedom to travel within
|
||||
rjd.upperAngle = +0.9
|
||||
else:
|
||||
rjd.lowerAngle = -0.9
|
||||
@@ -212,14 +220,14 @@ class LunarLander(gym.Env, EzPickle):
|
||||
|
||||
self.drawlist = [self.lander] + self.legs
|
||||
|
||||
return self.step(np.array([0,0]) if self.continuous else 0)[0]
|
||||
return self.step(np.array([0, 0]) if self.continuous else 0)[0]
|
||||
|
||||
def _create_particle(self, mass, x, y, ttl):
|
||||
p = self.world.CreateDynamicBody(
|
||||
position = (x,y),
|
||||
position = (x, y),
|
||||
angle=0.0,
|
||||
fixtures = fixtureDef(
|
||||
shape=circleShape(radius=2/SCALE, pos=(0,0)),
|
||||
shape=circleShape(radius=2/SCALE, pos=(0, 0)),
|
||||
density=mass,
|
||||
friction=0.1,
|
||||
categoryBits=0x0100,
|
||||
@@ -232,7 +240,7 @@ class LunarLander(gym.Env, EzPickle):
|
||||
return p
|
||||
|
||||
def _clean_particles(self, all):
|
||||
while self.particles and (all or self.particles[0].ttl<0):
|
||||
while self.particles and (all or self.particles[0].ttl < 0):
|
||||
self.world.DestroyBody(self.particles.pop(0))
|
||||
|
||||
def step(self, action):
|
||||
@@ -243,40 +251,53 @@ class LunarLander(gym.Env, EzPickle):
|
||||
|
||||
# Engines
|
||||
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
|
||||
side = (-tip[1], tip[0]);
|
||||
side = (-tip[1], tip[0])
|
||||
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
||||
|
||||
m_power = 0.0
|
||||
if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
|
||||
if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
|
||||
# Main engine
|
||||
if self.continuous:
|
||||
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
|
||||
assert m_power>=0.5 and m_power <= 1.0
|
||||
assert m_power >= 0.5 and m_power <= 1.0
|
||||
else:
|
||||
m_power = 1.0
|
||||
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
|
||||
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
|
||||
ox = (tip[0] * (4/SCALE + 2 * dispersion[0]) +
|
||||
side[0] * dispersion[1]) # 4 is move a bit downwards, +-2 for randomness
|
||||
oy = -tip[1] * (4/SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
|
||||
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
|
||||
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
|
||||
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER*m_power, oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||
p = self._create_particle(3.5, # 3.5 is here to make particle speed adequate
|
||||
impulse_pos[0],
|
||||
impulse_pos[1],
|
||||
m_power) # particles are just a decoration
|
||||
p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
|
||||
impulse_pos,
|
||||
True)
|
||||
self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
|
||||
impulse_pos,
|
||||
True)
|
||||
|
||||
s_power = 0.0
|
||||
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
|
||||
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
|
||||
# Orientation engines
|
||||
if self.continuous:
|
||||
direction = np.sign(action[1])
|
||||
s_power = np.clip(np.abs(action[1]), 0.5,1.0)
|
||||
assert s_power>=0.5 and s_power <= 1.0
|
||||
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
|
||||
assert s_power >= 0.5 and s_power <= 1.0
|
||||
else:
|
||||
direction = action-2
|
||||
s_power = 1.0
|
||||
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
|
||||
ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
|
||||
oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
|
||||
impulse_pos = (self.lander.position[0] + ox - tip[0] * 17/SCALE,
|
||||
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT/SCALE)
|
||||
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
|
||||
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER*s_power, oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||
p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
|
||||
impulse_pos
|
||||
, True)
|
||||
self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
|
||||
impulse_pos,
|
||||
True)
|
||||
|
||||
self.world.Step(1.0/FPS, 6*30, 2*30)
|
||||
|
||||
@@ -292,7 +313,7 @@ class LunarLander(gym.Env, EzPickle):
|
||||
1.0 if self.legs[0].ground_contact else 0.0,
|
||||
1.0 if self.legs[1].ground_contact else 0.0
|
||||
]
|
||||
assert len(state)==8
|
||||
assert len(state) == 8
|
||||
|
||||
reward = 0
|
||||
shaping = \
|
||||
@@ -304,7 +325,7 @@ class LunarLander(gym.Env, EzPickle):
|
||||
reward = shaping - self.prev_shaping
|
||||
self.prev_shaping = shaping
|
||||
|
||||
reward -= m_power*0.30 # less fuel spent is better, about -30 for heurisic landing
|
||||
reward -= m_power*0.30 # less fuel spent is better, about -30 for heuristic landing
|
||||
reward -= s_power*0.03
|
||||
|
||||
done = False
|
||||
@@ -324,13 +345,13 @@ class LunarLander(gym.Env, EzPickle):
|
||||
|
||||
for obj in self.particles:
|
||||
obj.ttl -= 0.15
|
||||
obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
|
||||
obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
|
||||
obj.color1 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
|
||||
obj.color2 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
|
||||
|
||||
self._clean_particles(False)
|
||||
|
||||
for p in self.sky_polys:
|
||||
self.viewer.draw_polygon(p, color=(0,0,0))
|
||||
self.viewer.draw_polygon(p, color=(0, 0, 0))
|
||||
|
||||
for obj in self.particles + self.drawlist:
|
||||
for f in obj.fixtures:
|
||||
@@ -348,42 +369,56 @@ class LunarLander(gym.Env, EzPickle):
|
||||
for x in [self.helipad_x1, self.helipad_x2]:
|
||||
flagy1 = self.helipad_y
|
||||
flagy2 = flagy1 + 50/SCALE
|
||||
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
|
||||
self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
|
||||
self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
|
||||
self.viewer.draw_polygon([(x, flagy2), (x, flagy2-10/SCALE), (x + 25/SCALE, flagy2 - 5/SCALE)],
|
||||
color=(0.8, 0.8, 0))
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer is not None:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
||||
|
||||
|
||||
class LunarLanderContinuous(LunarLander):
|
||||
continuous = True
|
||||
|
||||
def heuristic(env, s):
|
||||
# Heuristic for:
|
||||
# 1. Testing.
|
||||
# 2. Demonstration rollout.
|
||||
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
|
||||
"""
|
||||
The heuristic for
|
||||
1. Testing
|
||||
2. Demonstration rollout.
|
||||
|
||||
Args:
|
||||
env: The environment
|
||||
s (list): The state. Attributes:
|
||||
s[0] is the horizontal coordinate
|
||||
s[1] is the vertical coordinate
|
||||
s[2] is the horizontal speed
|
||||
s[3] is the vertical speed
|
||||
s[4] is the angle
|
||||
s[5] is the angular speed
|
||||
s[6] 1 if first leg has contact, else 0
|
||||
s[7] 1 if second leg has contact, else 0
|
||||
returns:
|
||||
a: The heuristic to be fed into the step function defined above to determine the next step and reward.
|
||||
"""
|
||||
|
||||
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center
|
||||
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
|
||||
if angle_targ < -0.4: angle_targ = -0.4
|
||||
hover_targ = 0.55*np.abs(s[0]) # target y should be proporional to horizontal offset
|
||||
hover_targ = 0.55*np.abs(s[0]) # target y should be proportional to horizontal offset
|
||||
|
||||
# PID controller: s[4] angle, s[5] angularSpeed
|
||||
angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
|
||||
#print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
|
||||
|
||||
# PID controller: s[1] vertical coordinate s[3] vertical speed
|
||||
angle_todo = (angle_targ - s[4]) * 0.5 - (s[5])*1.0
|
||||
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
|
||||
#print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
|
||||
|
||||
if s[6] or s[7]: # legs have contact
|
||||
angle_todo = 0
|
||||
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
|
||||
|
||||
if env.continuous:
|
||||
a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
|
||||
a = np.array([hover_todo*20 - 1, -angle_todo*20])
|
||||
a = np.clip(a, -1, +1)
|
||||
else:
|
||||
a = 0
|
||||
@@ -416,5 +451,3 @@ def demo_heuristic_lander(env, seed=None, render=False):
|
||||
|
||||
if __name__ == '__main__':
|
||||
demo_heuristic_lander(LunarLander(), render=True)
|
||||
|
||||
|
||||
|
@@ -214,13 +214,17 @@ class AcrobotEnv(core.Env):
|
||||
self.viewer = None
|
||||
|
||||
def wrap(x, m, M):
|
||||
"""
|
||||
:param x: a scalar
|
||||
:param m: minimum possible value in range
|
||||
:param M: maximum possible value in range
|
||||
Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
|
||||
"""Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
|
||||
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
|
||||
For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
|
||||
|
||||
Args:
|
||||
x: a scalar
|
||||
m: minimum possible value in range
|
||||
M: maximum possible value in range
|
||||
|
||||
Returns:
|
||||
x: a scalar, wrapped
|
||||
"""
|
||||
diff = M - m
|
||||
while x > M:
|
||||
@@ -230,10 +234,14 @@ def wrap(x, m, M):
|
||||
return x
|
||||
|
||||
def bound(x, m, M=None):
|
||||
"""
|
||||
:param x: scalar
|
||||
Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
|
||||
"""Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
|
||||
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
|
||||
|
||||
Args:
|
||||
x: scalar
|
||||
|
||||
Returns:
|
||||
x: scalar, bound between min (m) and Max (M)
|
||||
"""
|
||||
if M is None:
|
||||
M = m[1]
|
||||
@@ -248,17 +256,14 @@ def rk4(derivs, y0, t, *args, **kwargs):
|
||||
This is a toy implementation which may be useful if you find
|
||||
yourself stranded on a system w/o scipy. Otherwise use
|
||||
:func:`scipy.integrate`.
|
||||
*y0*
|
||||
initial state vector
|
||||
*t*
|
||||
sample times
|
||||
*derivs*
|
||||
returns the derivative of the system and has the
|
||||
signature ``dy = derivs(yi, ti)``
|
||||
*args*
|
||||
additional arguments passed to the derivative function
|
||||
*kwargs*
|
||||
additional keyword arguments passed to the derivative function
|
||||
|
||||
Args:
|
||||
derivs: the derivative of the system and has the signature ``dy = derivs(yi, ti)``
|
||||
y0: initial state vector
|
||||
t: sample times
|
||||
args: additional arguments passed to the derivative function
|
||||
kwargs: additional keyword arguments passed to the derivative function
|
||||
|
||||
Example 1 ::
|
||||
## 2D system
|
||||
def derivs6(x,t):
|
||||
@@ -278,6 +283,9 @@ def rk4(derivs, y0, t, *args, **kwargs):
|
||||
yout = rk4(derivs, y0, t)
|
||||
If you have access to scipy, you should probably be using the
|
||||
scipy.integrate tools rather than this function.
|
||||
|
||||
Returns:
|
||||
yout: Runge-Kutta approximation of the ODE
|
||||
"""
|
||||
|
||||
try:
|
||||
|
@@ -13,7 +13,8 @@ import numpy as np
|
||||
class CartPoleEnv(gym.Env):
|
||||
"""
|
||||
Description:
|
||||
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
|
||||
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum
|
||||
starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
|
||||
|
||||
Source:
|
||||
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
|
||||
@@ -32,7 +33,9 @@ class CartPoleEnv(gym.Env):
|
||||
0 Push cart to the left
|
||||
1 Push cart to the right
|
||||
|
||||
Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
|
||||
Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is
|
||||
pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the
|
||||
cart underneath it
|
||||
|
||||
Reward:
|
||||
Reward is 1 for every step taken, including the termination step
|
||||
|
Reference in New Issue
Block a user