Clean some docstrings (#1854)

* add type of argument

* fix typos

* split lines for formatting

* reformat string, add ellipsis, remove r string

* make docstring stylistically consistent

* make docstrings a little more elaboratet

* reduce by 1 space

* make line wrap 120

* remove unnecessary line

* add returns to docstring

* add docstring, make code more pep8 and delete some unused print functions

* more pep8

* file docstring instead of comments

* delete unused variables, add file docstring and add some pep8 spring cleaning

* add file docstring, fix typos and add some pep8 correections

Co-authored-by: Dan <daniel.timbrell@ing.com>
This commit is contained in:
Dan Timbrell
2020-04-24 23:10:27 +02:00
committed by GitHub
parent f2c9793eb7
commit 3bd5ef71c2
7 changed files with 360 additions and 291 deletions

View File

@@ -11,12 +11,20 @@ def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
"""
Generic implementation of the cross-entropy method for maximizing a black-box function
Args:
f: a function mapping from vector -> scalar
th_mean: initial mean over input distribution
batch_size: number of samples of theta to evaluate per batch
n_iter: number of batches
elite_frac: each batch, select this fraction of the top-performing samples
initial_std: initial standard deviation over parameter vectors
th_mean (np.array): initial mean over input distribution
batch_size (int): number of samples of theta to evaluate per batch
n_iter (int): number of batches
elite_frac (float): each batch, select this fraction of the top-performing samples
initial_std (float): initial standard deviation over parameter vectors
returns:
A generator of dicts. Subsequent dicts correspond to iterations of CEM algorithm.
The dicts contain the following values:
'ys' : numpy array with values of function evaluated at current population
'ys_mean': mean value of function over current population
'theta_mean': mean value of the parameter vector over current population
"""
n_elite = int(np.round(batch_size*elite_frac))
th_std = np.ones_like(th_mean) * initial_std

View File

@@ -6,7 +6,7 @@ env_closer = closer.Closer()
class Env(object):
r"""The main OpenAI Gym class. It encapsulates an environment with
"""The main OpenAI Gym class. It encapsulates an environment with
arbitrary behind-the-scenes dynamics. An environment can be
partially or fully observed.
@@ -26,9 +26,7 @@ class Env(object):
Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
The methods are accessed publicly as "step", "reset", etc.. The
non-underscored versions are wrapper methods to which we may add
functionality over time.
The methods are accessed publicly as "step", "reset", etc...
"""
# Set this in SOME subclasses
metadata = {'render.modes': []}
@@ -174,9 +172,9 @@ class GoalEnv(Env):
def compute_reward(self, achieved_goal, desired_goal, info):
"""Compute the step reward. This externalizes the reward function and makes
it dependent on an a desired goal and the one that was achieved. If you wish to include
it dependent on a desired goal and the one that was achieved. If you wish to include
additional rewards that are independent of the goal, you can include the necessary values
to derive it in info and compute it accordingly.
to derive it in 'info' and compute it accordingly.
Args:
achieved_goal (object): the goal that was achieved during execution
@@ -194,7 +192,7 @@ class GoalEnv(Env):
class Wrapper(Env):
r"""Wraps the environment to allow a modular transformation.
"""Wraps the environment to allow a modular transformation.
This class is the base class for all wrappers. The subclass could override
some methods to change the behavior of the original environment without touching the

View File

@@ -1,15 +1,17 @@
"""
Top-down car dynamics simulation.
Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
This simulation is a bit more detailed, with wheels rotation.
Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
"""
import numpy as np
import math
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
# Top-down car dynamics simulation.
#
# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
# This simulation is a bit more detailed, with wheels rotation.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
SIZE = 0.02
ENGINE_POWER = 100000000*SIZE*SIZE
WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
@@ -17,18 +19,18 @@ FRICTION_LIMIT = 1000000*SIZE*SIZE # friction ~= mass ~= size^2 (ca
WHEEL_R = 27
WHEEL_W = 14
WHEELPOS = [
(-55,+80), (+55,+80),
(-55,-82), (+55,-82)
(-55, +80), (+55, +80),
(-55, -82), (+55, -82)
]
HULL_POLY1 =[
(-60,+130), (+60,+130),
(+60,+110), (-60,+110)
HULL_POLY1 = [
(-60, +130), (+60, +130),
(+60, +110), (-60, +110)
]
HULL_POLY2 =[
(-15,+120), (+15,+120),
HULL_POLY2 = [
(-15, +120), (+15, +120),
(+20, +20), (-20, 20)
]
HULL_POLY3 =[
HULL_POLY3 = [
(+25, +20),
(+50, -10),
(+50, -40),
@@ -38,41 +40,42 @@ HULL_POLY3 =[
(-50, -10),
(-25, +20)
]
HULL_POLY4 =[
(-50,-120), (+50,-120),
(+50,-90), (-50,-90)
HULL_POLY4 = [
(-50, -120), (+50, -120),
(+50, -90), (-50, -90)
]
WHEEL_COLOR = (0.0,0.0,0.0)
WHEEL_WHITE = (0.3,0.3,0.3)
MUD_COLOR = (0.4,0.4,0.0)
WHEEL_COLOR = (0.0, 0.0, 0.0)
WHEEL_WHITE = (0.3, 0.3, 0.3)
MUD_COLOR = (0.4, 0.4, 0.0)
class Car:
def __init__(self, world, init_angle, init_x, init_y):
self.world = world
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
angle = init_angle,
fixtures = [
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
position=(init_x, init_y),
angle=init_angle,
fixtures=[
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY1]), density=1.0),
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY2]), density=1.0),
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY3]), density=1.0),
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY4]), density=1.0)
]
)
self.hull.color = (0.8,0.0,0.0)
self.hull.color = (0.8, 0.0, 0.0)
self.wheels = []
self.fuel_spent = 0.0
WHEEL_POLY = [
(-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
(+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
(-WHEEL_W, +WHEEL_R), (+WHEEL_W, +WHEEL_R),
(+WHEEL_W, -WHEEL_R), (-WHEEL_W, -WHEEL_R)
]
for wx,wy in WHEELPOS:
for wx, wy in WHEELPOS:
front_k = 1.0 if wy > 0 else 1.0
w = self.world.CreateDynamicBody(
position = (init_x+wx*SIZE, init_y+wy*SIZE),
angle = init_angle,
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
position=(init_x+wx*SIZE, init_y+wy*SIZE),
angle=init_angle,
fixtures=fixtureDef(
shape=polygonShape(vertices=[(x*front_k*SIZE,y*front_k*SIZE) for x, y in WHEEL_POLY]),
density=0.1,
categoryBits=0x0020,
maskBits=0x001,
@@ -90,14 +93,14 @@ class Car:
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=w,
localAnchorA=(wx*SIZE,wy*SIZE),
localAnchorA=(wx*SIZE, wy*SIZE),
localAnchorB=(0,0),
enableMotor=True,
enableLimit=True,
maxMotorTorque=180*900*SIZE*SIZE,
motorSpeed = 0,
lowerAngle = -0.4,
upperAngle = +0.4,
motorSpeed=0,
lowerAngle=-0.4,
upperAngle=+0.4,
)
w.joint = self.world.CreateJoint(rjd)
w.tiles = set()
@@ -107,7 +110,11 @@ class Car:
self.particles = []
def gas(self, gas):
'control: rear wheel drive'
"""control: rear wheel drive
Args:
gas (float): How much gas gets applied. Gets clipped between 0 and 1.
"""
gas = np.clip(gas, 0, 1)
for w in self.wheels[2:4]:
diff = gas - w.gas
@@ -115,12 +122,18 @@ class Car:
w.gas += diff
def brake(self, b):
'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
"""control: brake
Args:
b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation"""
for w in self.wheels:
w.brake = b
def steer(self, s):
'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
"""control: steer
Args:
s (-1..1): target position, it takes time to rotate steering wheel from side-to-side"""
self.wheels[0].steer = s
self.wheels[1].steer = s
@@ -148,7 +161,9 @@ class Car:
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0) # small coef not to divide by zero
# add small coef not to divide by zero
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)
self.fuel_spent += dt*ENGINE_POWER*w.gas
if w.brake >= 0.9:
@@ -167,13 +182,15 @@ class Car:
# Physically correct is to always apply friction_limit until speed is equal.
# But dt is finite, that will lead to oscillations if difference is already near zero.
f_force *= 205000*SIZE*SIZE # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
# Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
f_force *= 205000*SIZE*SIZE
p_force *= 205000*SIZE*SIZE
force = np.sqrt(np.square(f_force) + np.square(p_force))
# Skid trace
if abs(force) > 2.0*friction_limit:
if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
if w.skid_particle and w.skid_particle.grass == grass and len(w.skid_particle.poly) < 30:
w.skid_particle.poly.append( (w.position[0], w.position[1]) )
elif w.skid_start is None:
w.skid_start = w.position
@@ -213,9 +230,9 @@ class Car:
s2 = math.sin(a2)
c1 = math.cos(a1)
c2 = math.cos(a2)
if s1>0 and s2>0: continue
if s1>0: c1 = np.sign(c1)
if s2>0: c2 = np.sign(c2)
if s1 > 0 and s2 > 0: continue
if s1 > 0: c1 = np.sign(c1)
if s2 > 0: c2 = np.sign(c2)
white_poly = [
(-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
(+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
@@ -228,7 +245,7 @@ class Car:
p = Particle()
p.color = WHEEL_COLOR if not grass else MUD_COLOR
p.ttl = 1
p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
p.poly = [(point1[0], point1[1]), (point2[0], point2[1])]
p.grass = grass
self.particles.append(p)
while len(self.particles) > 30:

View File

@@ -1,3 +1,33 @@
"""
Easiest continuous control task to learn from pixels, a top-down racing environment.
Discrete control is reasonable in this environment as well, on/off discretization is
fine.
State consists of STATE_W x STATE_H pixels.
The reward is -0.1 every frame and +1000/N for every track tile visited, where N is
the total number of tiles visited in the track. For example, if you have finished in 732 frames,
your reward is 1000 - 0.1*732 = 926.8 points.
The game is solved when the agent consistently gets 900+ points. The generated track is random every episode.
The episode finishes when all the tiles are visited. The car also can go outside of the PLAYFIELD - that
is far off the track, then it will get -100 and die.
Some indicators are shown at the bottom of the window along with the state RGB buffer. From
left to right: the true speed, four ABS sensors, the steering wheel position and gyroscope.
To play yourself (it's rather fast for humans), type:
python gym/envs/box2d/car_racing.py
Remember it's a powerful rear-wheel drive car - don't press the accelerator and turn at the
same time.
Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
"""
import sys, math
import numpy as np
@@ -12,33 +42,6 @@ from gym.utils import colorize, seeding, EzPickle
import pyglet
from pyglet import gl
# Easiest continuous control task to learn from pixels, a top-down racing environment.
# Discrete control is reasonable in this environment as well, on/off discretization is
# fine.
#
# State consists of STATE_W x STATE_H pixels.
#
# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
# the total number of tiles visited in the track. For example, if you have finished in 732 frames,
# your reward is 1000 - 0.1*732 = 926.8 points.
#
# Game is solved when agent consistently gets 900+ points. Track generated is random every episode.
#
# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
# is far off the track, then it will get -100 and die.
#
# Some indicators shown at the bottom of the window and the state RGB buffer. From
# left to right: true speed, four ABS sensors, steering wheel position and gyroscope.
#
# To play yourself (it's rather fast for humans), type:
#
# python gym/envs/box2d/car_racing.py
#
# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
# same time.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
STATE_W = 96 # less than Atari 160x192
STATE_H = 96
VIDEO_W = 600
@@ -62,14 +65,18 @@ BORDER_MIN_COUNT = 4
ROAD_COLOR = [0.4, 0.4, 0.4]
class FrictionDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
self._contact(contact, True)
def EndContact(self, contact):
self._contact(contact, False)
def _contact(self, contact, begin):
tile = None
obj = None
@@ -91,14 +98,12 @@ class FrictionDetector(contactListener):
return
if begin:
obj.tiles.add(tile)
# print tile.road_friction, "ADD", len(obj.tiles)
if not tile.road_visited:
tile.road_visited = True
self.env.reward += 1000.0/len(self.env.track)
self.env.tile_visited_count += 1
else:
obj.tiles.remove(tile)
# print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
class CarRacing(gym.Env, EzPickle):
metadata = {
@@ -120,10 +125,12 @@ class CarRacing(gym.Env, EzPickle):
self.prev_reward = 0.0
self.verbose = verbose
self.fd_tile = fixtureDef(
shape = polygonShape(vertices=
[(0, 0),(1, 0),(1, -1),(0, -1)]))
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))
self.action_space = spaces.Box(np.array([-1, 0, 0]),
np.array([+1, +1, +1]),
dtype=np.float32) # steer, gas, brake
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
def seed(self, seed=None):
@@ -146,19 +153,14 @@ class CarRacing(gym.Env, EzPickle):
for c in range(CHECKPOINTS):
alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
if c==0:
if c == 0:
alpha = 0
rad = 1.5*TRACK_RAD
if c==CHECKPOINTS-1:
if c == CHECKPOINTS-1:
alpha = 2*math.pi*c/CHECKPOINTS
self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
rad = 1.5*TRACK_RAD
checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
# print "\n".join(str(h) for h in checkpoints)
# self.road_poly = [ ( # uncomment this to see checkpoints
# [ (tx,ty) for a,tx,ty in checkpoints ],
# (0.7,0.7,0.9) ) ]
checkpoints.append((alpha, rad*math.cos(alpha), rad*math.sin(alpha)))
self.road = []
# Go from one checkpoint to another to create track
@@ -209,31 +211,30 @@ class CarRacing(gym.Env, EzPickle):
beta += min(TRACK_TURN_RATE, abs(0.001*proj))
x += p1x*TRACK_DETAIL_STEP
y += p1y*TRACK_DETAIL_STEP
track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
track.append((alpha,prev_beta*0.5 + beta*0.5,x,y))
if laps > 4:
break
no_freeze -= 1
if no_freeze==0:
if no_freeze == 0:
break
# print "\n".join([str(t) for t in enumerate(track)])
# Find closed loop range i1..i2, first loop should be ignored, second is OK
i1, i2 = -1, -1
i = len(track)
while True:
i -= 1
if i==0:
if i == 0:
return False # Failed
pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
if pass_through_start and i2==-1:
if pass_through_start and i2 == -1:
i2 = i
elif pass_through_start and i1==-1:
elif pass_through_start and i1 == -1:
i1 = i
break
if self.verbose == 1:
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
assert i1!=-1
assert i2!=-1
assert i1 != -1
assert i2 != -1
track = track[i1:i2-1]
@@ -242,8 +243,8 @@ class CarRacing(gym.Env, EzPickle):
first_perp_y = math.sin(first_beta)
# Length of perpendicular jump to put together head and tail
well_glued_together = np.sqrt(
np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
np.square(first_perp_x*(track[0][2] - track[-1][2])) +
np.square(first_perp_y*(track[0][3] - track[-1][3])))
if well_glued_together > TRACK_DETAIL_STEP:
return False
@@ -284,11 +285,13 @@ class CarRacing(gym.Env, EzPickle):
self.road.append(t)
if border[i]:
side = np.sign(beta2 - beta1)
b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1))
b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2))
b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1))
b1_r = (x1 + side * (TRACK_WIDTH+BORDER) * math.cos(beta1),
y1 + side * (TRACK_WIDTH+BORDER)*math.sin(beta1))
b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2))
b2_r = (x2 + side * (TRACK_WIDTH+BORDER) * math.cos(beta2),
y2 + side * (TRACK_WIDTH+BORDER) * math.sin(beta2))
self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0)))
self.track = track
return True
@@ -305,7 +308,7 @@ class CarRacing(gym.Env, EzPickle):
if success:
break
if self.verbose == 1:
print("retry to generate track (normal if there are not many of this messages)")
print("retry to generate track (normal if there are not many instances of this message)")
self.car = Car(self.world, *self.track[0][1:4])
return self.step(None)[0]
@@ -331,7 +334,7 @@ class CarRacing(gym.Env, EzPickle):
self.car.fuel_spent = 0.0
step_reward = self.reward - self.prev_reward
self.prev_reward = self.reward
if self.tile_visited_count==len(self.track):
if self.tile_visited_count == len(self.track):
done = True
x, y = self.car.hull.position
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
@@ -353,8 +356,6 @@ class CarRacing(gym.Env, EzPickle):
if "t" not in self.__dict__: return # reset() not called yet
zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second
zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W
zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W
scroll_x = self.car.hull.position[0]
scroll_y = self.car.hull.position[1]
angle = -self.car.hull.angle
@@ -364,10 +365,10 @@ class CarRacing(gym.Env, EzPickle):
self.transform.set_scale(zoom, zoom)
self.transform.set_translation(
WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)))
self.transform.set_rotation(angle)
self.car.draw(self.viewer, mode!="state_pixels")
self.car.draw(self.viewer, mode != "state_pixels")
arr = None
win = self.viewer.window
@@ -376,7 +377,7 @@ class CarRacing(gym.Env, EzPickle):
win.clear()
t = self.transform
if mode=='rgb_array':
if mode == 'rgb_array':
VP_W = VIDEO_W
VP_H = VIDEO_H
elif mode == 'state_pixels':
@@ -439,17 +440,19 @@ class CarRacing(gym.Env, EzPickle):
gl.glBegin(gl.GL_QUADS)
s = W/40.0
h = H/40.0
gl.glColor4f(0,0,0,1)
gl.glColor4f(0, 0, 0, 1)
gl.glVertex3f(W, 0, 0)
gl.glVertex3f(W, 5*h, 0)
gl.glVertex3f(0, 5*h, 0)
gl.glVertex3f(0, 0, 0)
def vertical_ind(place, val, color):
gl.glColor4f(color[0], color[1], color[2], 1)
gl.glVertex3f((place+0)*s, h + h*val, 0)
gl.glVertex3f((place+1)*s, h + h*val, 0)
gl.glVertex3f((place+1)*s, h, 0)
gl.glVertex3f((place+0)*s, h, 0)
def horiz_ind(place, val, color):
gl.glColor4f(color[0], color[1], color[2], 1)
gl.glVertex3f((place+0)*s, 4*h , 0)
@@ -457,13 +460,13 @@ class CarRacing(gym.Env, EzPickle):
gl.glVertex3f((place+val)*s, 2*h, 0)
gl.glVertex3f((place+0)*s, 2*h, 0)
true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
vertical_ind(5, 0.02*true_speed, (1,1,1))
vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
vertical_ind(5, 0.02*true_speed, (1, 1, 1))
vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors
vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0, 0, 1))
vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2, 0, 1))
vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2, 0, 1))
horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0, 1, 0))
horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1, 0, 0))
gl.glEnd()
self.score_label.text = "%04i" % self.reward
self.score_label.draw()
@@ -471,19 +474,21 @@ class CarRacing(gym.Env, EzPickle):
if __name__=="__main__":
from pyglet.window import key
a = np.array( [0.0, 0.0, 0.0] )
a = np.array([0.0, 0.0, 0.0])
def key_press(k, mod):
global restart
if k==0xff0d: restart = True
if k==key.LEFT: a[0] = -1.0
if k==key.RIGHT: a[0] = +1.0
if k==key.UP: a[1] = +1.0
if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
if k == 0xff0d: restart = True
if k == key.LEFT: a[0] = -1.0
if k == key.RIGHT: a[0] = +1.0
if k == key.UP: a[1] = +1.0
if k == key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
def key_release(k, mod):
if k==key.LEFT and a[0]==-1.0: a[0] = 0
if k==key.RIGHT and a[0]==+1.0: a[0] = 0
if k==key.UP: a[1] = 0
if k==key.DOWN: a[2] = 0
if k == key.LEFT and a[0] == -1.0: a[0] = 0
if k == key.RIGHT and a[0] == +1.0: a[0] = 0
if k == key.UP: a[1] = 0
if k == key.DOWN: a[2] = 0
env = CarRacing()
env.render()
env.viewer.window.on_key_press = key_press
@@ -504,9 +509,6 @@ if __name__=="__main__":
if steps % 200 == 0 or done:
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
#import matplotlib.pyplot as plt
#plt.imshow(s)
#plt.savefig("test.jpeg")
steps += 1
isopen = env.render()
if done or restart or isopen == False:

View File

@@ -1,3 +1,31 @@
"""
Rocket trajectory optimization is a classic topic in Optimal Control.
According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
The landing pad is always at coordinates (0,0). The coordinates are the first two numbers in the state vector.
Reward for moving from the top of the screen to the landing pad and zero speed is about 100..140 points.
If the lander moves away from the landing pad it loses reward. The episode finishes if the lander crashes or
comes to rest, receiving an additional -100 or +100 points. Each leg with ground contact is +10 points.
Firing the main engine is -0.3 points each frame. Firing the side engine is -0.03 points each frame.
Solved is 200 points.
Landing outside the landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
on its first attempt. Please see the source code for details.
To see a heuristic landing, run:
python gym/envs/box2d/lunar_lander.py
To play yourself, run:
python examples/agents/keyboard_agent.py LunarLander-v2
Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
"""
import sys, math
import numpy as np
@@ -8,30 +36,6 @@ import gym
from gym import spaces
from gym.utils import seeding, EzPickle
# Rocket trajectory optimization is a classic topic in Optimal Control.
#
# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
#
# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
# engine is -0.3 points each frame. Firing side engine is -0.03 points each frame. Solved is 200 points.
#
# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
# on its first attempt. Please see source code for details.
#
# To see heuristic landing, run:
#
# python gym/envs/box2d/lunar_lander.py
#
# To play yourself, run:
#
# python examples/agents/keyboard_agent.py LunarLander-v2
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
@@ -41,8 +45,8 @@ SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY =[
(-14,+17), (-17,0), (-17,-10),
(+17,-10), (+17,0), (+14,+17)
(-14, +17), (-17, 0), (-17 ,-10),
(+17, -10), (+17, 0), (+14, +17)
]
LEG_AWAY = 20
LEG_DOWN = 18
@@ -55,21 +59,25 @@ SIDE_ENGINE_AWAY = 12.0
VIEWPORT_W = 600
VIEWPORT_H = 400
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
self.env.game_over = True
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = True
def EndContact(self, contact):
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = False
class LunarLander(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array'],
@@ -131,7 +139,7 @@ class LunarLander(gym.Env, EzPickle):
# terrain
CHUNKS = 11
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,))
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS//2-1]
self.helipad_x2 = chunk_x[CHUNKS//2+1]
@@ -143,7 +151,7 @@ class LunarLander(gym.Env, EzPickle):
height[CHUNKS//2+2] = self.helipad_y
smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
self.moon = self.world.CreateStaticBody(shapes=edgeShape(vertices=[(0, 0), (W, 0)]))
self.sky_polys = []
for i in range(CHUNKS-1):
p1 = (chunk_x[i], smooth_y[i])
@@ -152,36 +160,36 @@ class LunarLander(gym.Env, EzPickle):
vertices=[p1,p2],
density=0,
friction=0.1)
self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
self.moon.color1 = (0.0,0.0,0.0)
self.moon.color2 = (0.0,0.0,0.0)
self.moon.color1 = (0.0, 0.0, 0.0)
self.moon.color2 = (0.0, 0.0, 0.0)
initial_y = VIEWPORT_H/SCALE
self.lander = self.world.CreateDynamicBody(
position = (VIEWPORT_W/SCALE/2, initial_y),
position=(VIEWPORT_W/SCALE/2, initial_y),
angle=0.0,
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
shape=polygonShape(vertices=[(x/SCALE, y/SCALE) for x, y in LANDER_POLY]),
density=5.0,
friction=0.1,
categoryBits=0x0010,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
)
self.lander.color1 = (0.5,0.4,0.9)
self.lander.color2 = (0.3,0.3,0.5)
self.lander.color1 = (0.5, 0.4, 0.9)
self.lander.color2 = (0.3, 0.3, 0.5)
self.lander.ApplyForceToCenter( (
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
), True)
self.legs = []
for i in [-1,+1]:
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
angle = (i*0.05),
fixtures = fixtureDef(
position=(VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
angle=(i * 0.05),
fixtures=fixtureDef(
shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
density=1.0,
restitution=0.0,
@@ -189,20 +197,20 @@ class LunarLander(gym.Env, EzPickle):
maskBits=0x001)
)
leg.ground_contact = False
leg.color1 = (0.5,0.4,0.9)
leg.color2 = (0.3,0.3,0.5)
leg.color1 = (0.5, 0.4, 0.9)
leg.color2 = (0.3, 0.3, 0.5)
rjd = revoluteJointDef(
bodyA=self.lander,
bodyB=leg,
localAnchorA=(0, 0),
localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
localAnchorB=(i * LEG_AWAY/SCALE, LEG_DOWN/SCALE),
enableMotor=True,
enableLimit=True,
maxMotorTorque=LEG_SPRING_TORQUE,
motorSpeed=+0.3*i # low enough not to jump back into the sky
motorSpeed=+0.3 * i # low enough not to jump back into the sky
)
if i==-1:
rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within
if i == -1:
rjd.lowerAngle = +0.9 - 0.5 # The most esoteric numbers here, angled legs have freedom to travel within
rjd.upperAngle = +0.9
else:
rjd.lowerAngle = -0.9
@@ -212,14 +220,14 @@ class LunarLander(gym.Env, EzPickle):
self.drawlist = [self.lander] + self.legs
return self.step(np.array([0,0]) if self.continuous else 0)[0]
return self.step(np.array([0, 0]) if self.continuous else 0)[0]
def _create_particle(self, mass, x, y, ttl):
p = self.world.CreateDynamicBody(
position = (x,y),
position = (x, y),
angle=0.0,
fixtures = fixtureDef(
shape=circleShape(radius=2/SCALE, pos=(0,0)),
shape=circleShape(radius=2/SCALE, pos=(0, 0)),
density=mass,
friction=0.1,
categoryBits=0x0100,
@@ -232,7 +240,7 @@ class LunarLander(gym.Env, EzPickle):
return p
def _clean_particles(self, all):
while self.particles and (all or self.particles[0].ttl<0):
while self.particles and (all or self.particles[0].ttl < 0):
self.world.DestroyBody(self.particles.pop(0))
def step(self, action):
@@ -243,40 +251,53 @@ class LunarLander(gym.Env, EzPickle):
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0]);
side = (-tip[1], tip[0])
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
assert m_power>=0.5 and m_power <= 1.0
assert m_power >= 0.5 and m_power <= 1.0
else:
m_power = 1.0
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
ox = (tip[0] * (4/SCALE + 2 * dispersion[0]) +
side[0] * dispersion[1]) # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1] * (4/SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER*m_power, oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
p = self._create_particle(3.5, # 3.5 is here to make particle speed adequate
impulse_pos[0],
impulse_pos[1],
m_power) # particles are just a decoration
p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True)
self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5,1.0)
assert s_power>=0.5 and s_power <= 1.0
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
assert s_power >= 0.5 and s_power <= 1.0
else:
direction = action-2
s_power = 1.0
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
impulse_pos = (self.lander.position[0] + ox - tip[0] * 17/SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT/SCALE)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER*s_power, oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
impulse_pos
, True)
self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True)
self.world.Step(1.0/FPS, 6*30, 2*30)
@@ -292,7 +313,7 @@ class LunarLander(gym.Env, EzPickle):
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0
]
assert len(state)==8
assert len(state) == 8
reward = 0
shaping = \
@@ -304,7 +325,7 @@ class LunarLander(gym.Env, EzPickle):
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= m_power*0.30 # less fuel spent is better, about -30 for heurisic landing
reward -= m_power*0.30 # less fuel spent is better, about -30 for heuristic landing
reward -= s_power*0.03
done = False
@@ -324,13 +345,13 @@ class LunarLander(gym.Env, EzPickle):
for obj in self.particles:
obj.ttl -= 0.15
obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
obj.color1 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
obj.color2 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
self._clean_particles(False)
for p in self.sky_polys:
self.viewer.draw_polygon(p, color=(0,0,0))
self.viewer.draw_polygon(p, color=(0, 0, 0))
for obj in self.particles + self.drawlist:
for f in obj.fixtures:
@@ -348,42 +369,56 @@ class LunarLander(gym.Env, EzPickle):
for x in [self.helipad_x1, self.helipad_x2]:
flagy1 = self.helipad_y
flagy2 = flagy1 + 50/SCALE
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
self.viewer.draw_polygon([(x, flagy2), (x, flagy2-10/SCALE), (x + 25/SCALE, flagy2 - 5/SCALE)],
color=(0.8, 0.8, 0))
return self.viewer.render(return_rgb_array = mode=='rgb_array')
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
class LunarLanderContinuous(LunarLander):
continuous = True
def heuristic(env, s):
# Heuristic for:
# 1. Testing.
# 2. Demonstration rollout.
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
"""
The heuristic for
1. Testing
2. Demonstration rollout.
Args:
env: The environment
s (list): The state. Attributes:
s[0] is the horizontal coordinate
s[1] is the vertical coordinate
s[2] is the horizontal speed
s[3] is the vertical speed
s[4] is the angle
s[5] is the angular speed
s[6] 1 if first leg has contact, else 0
s[7] 1 if second leg has contact, else 0
returns:
a: The heuristic to be fed into the step function defined above to determine the next step and reward.
"""
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
if angle_targ < -0.4: angle_targ = -0.4
hover_targ = 0.55*np.abs(s[0]) # target y should be proporional to horizontal offset
hover_targ = 0.55*np.abs(s[0]) # target y should be proportional to horizontal offset
# PID controller: s[4] angle, s[5] angularSpeed
angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
#print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
# PID controller: s[1] vertical coordinate s[3] vertical speed
angle_todo = (angle_targ - s[4]) * 0.5 - (s[5])*1.0
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
#print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
if s[6] or s[7]: # legs have contact
angle_todo = 0
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
if env.continuous:
a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
a = np.array([hover_todo*20 - 1, -angle_todo*20])
a = np.clip(a, -1, +1)
else:
a = 0
@@ -416,5 +451,3 @@ def demo_heuristic_lander(env, seed=None, render=False):
if __name__ == '__main__':
demo_heuristic_lander(LunarLander(), render=True)

View File

@@ -214,13 +214,17 @@ class AcrobotEnv(core.Env):
self.viewer = None
def wrap(x, m, M):
"""
:param x: a scalar
:param m: minimum possible value in range
:param M: maximum possible value in range
Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
"""Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
Args:
x: a scalar
m: minimum possible value in range
M: maximum possible value in range
Returns:
x: a scalar, wrapped
"""
diff = M - m
while x > M:
@@ -230,10 +234,14 @@ def wrap(x, m, M):
return x
def bound(x, m, M=None):
"""
:param x: scalar
Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
"""Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
Args:
x: scalar
Returns:
x: scalar, bound between min (m) and Max (M)
"""
if M is None:
M = m[1]
@@ -248,17 +256,14 @@ def rk4(derivs, y0, t, *args, **kwargs):
This is a toy implementation which may be useful if you find
yourself stranded on a system w/o scipy. Otherwise use
:func:`scipy.integrate`.
*y0*
initial state vector
*t*
sample times
*derivs*
returns the derivative of the system and has the
signature ``dy = derivs(yi, ti)``
*args*
additional arguments passed to the derivative function
*kwargs*
additional keyword arguments passed to the derivative function
Args:
derivs: the derivative of the system and has the signature ``dy = derivs(yi, ti)``
y0: initial state vector
t: sample times
args: additional arguments passed to the derivative function
kwargs: additional keyword arguments passed to the derivative function
Example 1 ::
## 2D system
def derivs6(x,t):
@@ -278,6 +283,9 @@ def rk4(derivs, y0, t, *args, **kwargs):
yout = rk4(derivs, y0, t)
If you have access to scipy, you should probably be using the
scipy.integrate tools rather than this function.
Returns:
yout: Runge-Kutta approximation of the ODE
"""
try:

View File

@@ -13,7 +13,8 @@ import numpy as np
class CartPoleEnv(gym.Env):
"""
Description:
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum
starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
Source:
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
@@ -32,7 +33,9 @@ class CartPoleEnv(gym.Env):
0 Push cart to the left
1 Push cart to the right
Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
Note: The amount the velocity that is reduced or increased is not fixed; it depends on the angle the pole is
pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the
cart underneath it
Reward:
Reward is 1 for every step taken, including the termination step