Blacken the codebase (#2265)

This commit is contained in:
Christian Clauss
2021-07-29 02:26:34 +02:00
committed by GitHub
parent 2f3133461c
commit bb81e141ea
162 changed files with 5503 additions and 3338 deletions

View File

@@ -8,10 +8,10 @@ jobs:
- uses: actions/setup-python@v2
- run: pip install bandit black codespell flake8 isort mypy pytest pyupgrade safety
- run: bandit --recursive --skip B101,B108,B301,B403,B404,B603 .
- run: black --check . || true
- run: black --check .
- run: codespell --ignore-words-list=nd,reacher,thist,ths -w
- run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
- run: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=88 --show-source --statistics
- run: flake8 --ignore=E203,E402,E712,E722,E731,E741,F401,F403,F405,F524,F841,W503
--max-complexity=30 --max-line-length=456 --show-source --statistics
- run: isort --check-only --profile black . || true
- run: pip install -e .[nomujoco]
- run: mypy --install-types --non-interactive . || true

View File

@@ -3,9 +3,15 @@ import argparse
import gym
parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.')
parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v2')
parser.add_argument('--step', type=int, default=1)
parser = argparse.ArgumentParser(
description="Renders a Gym environment for quick inspection."
)
parser.add_argument(
"env_id",
type=str,
help="the ID of the environment to be rendered (e.g. HalfCheetah-v2",
)
parser.add_argument("--step", type=int, default=1)
args = parser.parse_args()
env = gym.make(args.env_id)

View File

@@ -1,19 +1,23 @@
# Support code for cem.py
class BinaryActionLinearPolicy(object):
def __init__(self, theta):
self.w = theta[:-1]
self.b = theta[-1]
def act(self, ob):
y = ob.dot(self.w) + self.b
a = int(y < 0)
return a
class ContinuousActionLinearPolicy(object):
def __init__(self, theta, n_in, n_out):
assert len(theta) == (n_in + 1) * n_out
self.W = theta[0 : n_in * n_out].reshape(n_in, n_out)
self.b = theta[n_in * n_out : None].reshape(1, n_out)
def act(self, ob):
a = ob.dot(self.W) + self.b
return a

View File

@@ -1,11 +1,16 @@
import argparse
import json
import os
import pickle
import sys
from os import path
import numpy as np
import gym
from gym import wrappers, logger
import numpy as np
import pickle
import json, sys, os
from os import path
from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
import argparse
from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
"""
@@ -26,17 +31,23 @@ def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
'ys_mean': mean value of function over current population
'theta_mean': mean value of the parameter vector over current population
"""
n_elite = int(np.round(batch_size*elite_frac))
n_elite = int(np.round(batch_size * elite_frac))
th_std = np.ones_like(th_mean) * initial_std
for _ in range(n_iter):
ths = np.array([th_mean + dth for dth in th_std[None,:]*np.random.randn(batch_size, th_mean.size)])
ths = np.array(
[
th_mean + dth
for dth in th_std[None, :] * np.random.randn(batch_size, th_mean.size)
]
)
ys = np.array([f(th) for th in ths])
elite_inds = ys.argsort()[::-1][:n_elite]
elite_ths = ths[elite_inds]
th_mean = elite_ths.mean(axis=0)
th_std = elite_ths.std(axis=0)
yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()}
yield {"ys": ys, "theta_mean": th_mean, "y_mean": ys.mean()}
def do_rollout(agent, env, num_steps, render=False):
total_rew = 0
@@ -45,16 +56,19 @@ def do_rollout(agent, env, num_steps, render=False):
a = agent.act(ob)
(ob, reward, done, _info) = env.step(a)
total_rew += reward
if render and t%3==0: env.render()
if done: break
return total_rew, t+1
if render and t % 3 == 0:
env.render()
if done:
break
return total_rew, t + 1
if __name__ == '__main__':
if __name__ == "__main__":
logger.set_level(logger.INFO)
parser = argparse.ArgumentParser()
parser.add_argument('--display', action='store_true')
parser.add_argument('target', nargs="?", default="CartPole-v0")
parser.add_argument("--display", action="store_true")
parser.add_argument("target", nargs="?", default="CartPole-v0")
args = parser.parse_args()
env = gym.make(args.target)
@@ -66,17 +80,19 @@ if __name__ == '__main__':
# You provide the directory to write to (can be an existing
# directory, but can't contain previous monitor results. You can
# also dump to a tempdir if you'd like: tempfile.mkdtemp().
outdir = '/tmp/cem-agent-results'
outdir = "/tmp/cem-agent-results"
env = wrappers.Monitor(env, outdir, force=True)
# Prepare snapshotting
# ----------------------------------------
def writefile(fname, s):
with open(path.join(outdir, fname), 'w') as fh: fh.write(s)
with open(path.join(outdir, fname), "w") as fh:
fh.write(s)
info = {}
info['params'] = params
info['argv'] = sys.argv
info['env_id'] = env.spec.id
info["params"] = params
info["argv"] = sys.argv
info["env_id"] = env.spec.id
# ------------------------------------------
def noisy_evaluation(theta):
@@ -86,14 +102,16 @@ if __name__ == '__main__':
# Train the agent, and snapshot each stage
for (i, iterdata) in enumerate(
cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
if args.display: do_rollout(agent, env, 200, render=True)
writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))
cem(noisy_evaluation, np.zeros(env.observation_space.shape[0] + 1), **params)
):
print("Iteration %2i. Episode mean reward: %7.3f" % (i, iterdata["y_mean"]))
agent = BinaryActionLinearPolicy(iterdata["theta_mean"])
if args.display:
do_rollout(agent, env, 200, render=True)
writefile("agent-%.4i.pkl" % i, str(pickle.dumps(agent, -1)))
# Write out the env at the end so we store the parameters of this
# environment.
writefile('info.json', json.dumps(info))
writefile("info.json", json.dumps(info))
env.close()

View File

@@ -1,5 +1,8 @@
#!/usr/bin/env python
import sys, gym, time
import sys
import time
import gym
#
# Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
@@ -7,37 +10,45 @@ import sys, gym, time
# python keyboard_agent.py SpaceInvadersNoFrameskip-v4
#
env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
env = gym.make("LunarLander-v2" if len(sys.argv) < 2 else sys.argv[1])
if not hasattr(env.action_space, 'n'):
raise Exception('Keyboard agent only supports discrete action spaces')
if not hasattr(env.action_space, "n"):
raise Exception("Keyboard agent only supports discrete action spaces")
ACTIONS = env.action_space.n
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.
human_agent_action = 0
human_wants_restart = False
human_sets_pause = False
def key_press(key, mod):
global human_agent_action, human_wants_restart, human_sets_pause
if key==0xff0d: human_wants_restart = True
if key==32: human_sets_pause = not human_sets_pause
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
if key == 0xFF0D:
human_wants_restart = True
if key == 32:
human_sets_pause = not human_sets_pause
a = int(key - ord("0"))
if a <= 0 or a >= ACTIONS:
return
human_agent_action = a
def key_release(key, mod):
global human_agent_action
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
a = int(key - ord("0"))
if a <= 0 or a >= ACTIONS:
return
if human_agent_action == a:
human_agent_action = 0
env.render()
env.unwrapped.viewer.window.on_key_press = key_press
env.unwrapped.viewer.window.on_key_release = key_release
def rollout(env):
global human_agent_action, human_wants_restart, human_sets_pause
human_wants_restart = False
@@ -47,7 +58,7 @@ def rollout(env):
total_timesteps = 0
while 1:
if not skip:
#print("taking action {}".format(human_agent_action))
# print("taking action {}".format(human_agent_action))
a = human_agent_action
total_timesteps += 1
skip = SKIP_CONTROL
@@ -59,20 +70,24 @@ def rollout(env):
print("reward %0.3f" % r)
total_reward += r
window_still_open = env.render()
if window_still_open==False: return False
if done: break
if human_wants_restart: break
if window_still_open == False:
return False
if done:
break
if human_wants_restart:
break
while human_sets_pause:
env.render()
time.sleep(0.1)
time.sleep(0.1)
print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
print("ACTIONS={}".format(ACTIONS))
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
print("No keys pressed is taking action 0")
while 1:
window_still_open = rollout(env)
if window_still_open==False: break
if window_still_open == False:
break

View File

@@ -4,17 +4,22 @@ import sys
import gym
from gym import wrappers, logger
class RandomAgent(object):
"""The world's simplest agent!"""
def __init__(self, action_space):
self.action_space = action_space
def act(self, observation, reward, done):
return self.action_space.sample()
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=None)
parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run')
parser.add_argument(
"env_id", nargs="?", default="CartPole-v0", help="Select the environment to run"
)
args = parser.parse_args()
# You can set the level to logger.DEBUG or logger.WARN if you
@@ -27,7 +32,7 @@ if __name__ == '__main__':
# directory, including one with existing data -- all monitor files
# will be namespaced). You can also dump to a tempdir if you'd
# like: tempfile.mkdtemp().
outdir = '/tmp/random-agent-results'
outdir = "/tmp/random-agent-results"
env = wrappers.Monitor(env, directory=outdir, force=True)
env.seed(0)
agent = RandomAgent(env.action_space)

View File

@@ -6,7 +6,14 @@ import warnings
from gym import error
from gym.version import VERSION as __version__
from gym.core import Env, GoalEnv, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
from gym.core import (
Env,
GoalEnv,
Wrapper,
ObservationWrapper,
ActionWrapper,
RewardWrapper,
)
from gym.spaces import Space
from gym.envs import make, spec, register
from gym import logger

View File

@@ -28,9 +28,10 @@ class Env(object):
The methods are accessed publicly as "step", "reset", etc...
"""
# Set this in SOME subclasses
metadata = {'render.modes': []}
reward_range = (-float('inf'), float('inf'))
metadata = {"render.modes": []}
reward_range = (-float("inf"), float("inf"))
spec = None
# Set these in ALL subclasses
@@ -70,7 +71,7 @@ class Env(object):
"""
raise NotImplementedError
def render(self, mode='human'):
def render(self, mode="human"):
"""Renders the environment.
The set of supported modes varies per environment. (And some
@@ -145,16 +146,16 @@ class Env(object):
def __str__(self):
if self.spec is None:
return '<{} instance>'.format(type(self).__name__)
return "<{} instance>".format(type(self).__name__)
else:
return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
return "<{}<{}>>".format(type(self).__name__, self.spec.id)
def __enter__(self):
"""Support with-statement for the environment. """
"""Support with-statement for the environment."""
return self
def __exit__(self, *args):
"""Support with-statement for the environment. """
"""Support with-statement for the environment."""
self.close()
# propagate exception
return False
@@ -172,10 +173,16 @@ class GoalEnv(Env):
def reset(self):
# Enforce that each GoalEnv uses a Goal-compatible observation space.
if not isinstance(self.observation_space, gym.spaces.Dict):
raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
for key in ['observation', 'achieved_goal', 'desired_goal']:
raise error.Error(
"GoalEnv requires an observation space of type gym.spaces.Dict"
)
for key in ["observation", "achieved_goal", "desired_goal"]:
if key not in self.observation_space.spaces:
raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))
raise error.Error(
'GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(
key
)
)
def compute_reward(self, achieved_goal, desired_goal, info):
"""Compute the step reward. This externalizes the reward function and makes
@@ -210,6 +217,7 @@ class Wrapper(Env):
Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
"""
def __init__(self, env):
self.env = env
self.action_space = self.env.action_space
@@ -218,8 +226,10 @@ class Wrapper(Env):
self.metadata = self.env.metadata
def __getattr__(self, name):
if name.startswith('_'):
raise AttributeError("attempted to get missing private attribute '{}'".format(name))
if name.startswith("_"):
raise AttributeError(
"attempted to get missing private attribute '{}'".format(name)
)
return getattr(self.env, name)
@property
@@ -236,7 +246,7 @@ class Wrapper(Env):
def reset(self, **kwargs):
return self.env.reset(**kwargs)
def render(self, mode='human', **kwargs):
def render(self, mode="human", **kwargs):
return self.env.render(mode, **kwargs)
def close(self):
@@ -249,7 +259,7 @@ class Wrapper(Env):
return self.env.compute_reward(achieved_goal, desired_goal, info)
def __str__(self):
return '<{}{}>'.format(type(self).__name__, self.env)
return "<{}{}>".format(type(self).__name__, self.env)
def __repr__(self):
return str(self)
@@ -295,4 +305,4 @@ class ActionWrapper(Wrapper):
raise NotImplementedError
def reverse_action(self, action):
raise NotImplementedError
raise NotImplementedError

File diff suppressed because it is too large Load Diff

View File

@@ -42,7 +42,7 @@ from io import StringIO
class AlgorithmicEnv(Env):
metadata = {'render.modes': ['human', 'ansi']}
metadata = {"render.modes": ["human", "ansi"]}
# Only 'promote' the length of generated input strings if the worst of the
# last n episodes was no more than this far from the maximum reward
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
@@ -64,10 +64,10 @@ class AlgorithmicEnv(Env):
# earn and we got 8, we'd append -2
self.reward_shortfalls = []
if chars:
self.charmap = [chr(ord('A')+i) for i in range(base)]
self.charmap = [chr(ord("A") + i) for i in range(base)]
else:
self.charmap = [str(i) for i in range(base)]
self.charmap.append(' ')
self.charmap.append(" ")
self.min_length = starting_min_length
# Three sub-actions:
# 1. Move read head left or right (or up/down)
@@ -111,17 +111,18 @@ class AlgorithmicEnv(Env):
"""Return a string representation of the input tape/grid."""
raise NotImplementedError
def render(self, mode='human'):
outfile = StringIO() if mode == 'ansi' else sys.stdout
def render(self, mode="human"):
outfile = StringIO() if mode == "ansi" else sys.stdout
inp = "Total length of input instance: %d, step: %d\n" % (
self.input_width, self.time
self.input_width,
self.time,
)
outfile.write(inp)
y, action = self.write_head_position, self.last_action
if action is not None:
inp_act, out_act, pred = action
outfile.write("=" * (len(inp) - 1) + "\n")
y_str = "Output Tape : "
y_str = "Output Tape : "
target_str = "Targets : "
if action is not None:
pred_str = self.charmap[pred]
@@ -132,7 +133,7 @@ class AlgorithmicEnv(Env):
y_str += self._get_str_target(i)
elif i == (y - 1):
if action is not None and out_act == 1:
color = 'green' if pred == self.target[i] else 'red'
color = "green" if pred == self.target[i] else "red"
y_str += colorize(pred_str, color, highlight=True)
else:
y_str += self._get_str_target(i)
@@ -146,12 +147,15 @@ class AlgorithmicEnv(Env):
move = self.MOVEMENTS[inp_act]
outfile.write("Action : Tuple(move over input: %s,\n" % move)
out_act = out_act == 1
outfile.write(" write to the output tape: %s,\n" % out_act)
outfile.write(
" write to the output tape: %s,\n"
% out_act
)
outfile.write(" prediction: %s)\n" % pred_str)
else:
outfile.write("\n" * 5)
if mode != 'human':
if mode != "human":
with closing(outfile):
return outfile.getvalue()
@@ -175,7 +179,8 @@ class AlgorithmicEnv(Env):
"It looks like you're calling step() even though this "
"environment has already returned done=True. You should "
"always call reset() once you receive done=True. Any "
"further steps are undefined behaviour.")
"further steps are undefined behaviour."
)
correct = False
if correct:
reward = 1.0
@@ -209,10 +214,12 @@ class AlgorithmicEnv(Env):
# This is before the first episode/call to reset(). Nothing to do.
return
self.reward_shortfalls.append(self.episode_total_reward - len(self.target))
self.reward_shortfalls = self.reward_shortfalls[-self.last:]
if len(self.reward_shortfalls) == self.last and \
min(self.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
self.min_length < 30:
self.reward_shortfalls = self.reward_shortfalls[-self.last :]
if (
len(self.reward_shortfalls) == self.last
and min(self.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION
and self.min_length < 30
):
self.min_length += 1
self.reward_shortfalls = []
@@ -241,12 +248,13 @@ class AlgorithmicEnv(Env):
class TapeAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 1-d input tape."""
MOVEMENTS = ['left', 'right']
MOVEMENTS = ["left", "right"]
READ_HEAD_START = 0
def _move(self, movement):
named = self.MOVEMENTS[movement]
self.read_head_position += 1 if named == 'right' else -1
self.read_head_position += 1 if named == "right" else -1
def _get_obs(self, pos=None):
if pos is None:
@@ -269,7 +277,7 @@ class TapeAlgorithmicEnv(AlgorithmicEnv):
for i in range(-2, self.input_width + 2):
if i == x:
x_str += colorize(
self._get_str_obs(np.array([i])), 'green', highlight=True
self._get_str_obs(np.array([i])), "green", highlight=True
)
else:
x_str += self._get_str_obs(np.array([i]))
@@ -279,7 +287,8 @@ class TapeAlgorithmicEnv(AlgorithmicEnv):
class GridAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 2-d input grid."""
MOVEMENTS = ['left', 'right', 'up', 'down']
MOVEMENTS = ["left", "right", "up", "down"]
READ_HEAD_START = (0, 0)
def __init__(self, rows, *args, **kwargs):
@@ -289,13 +298,13 @@ class GridAlgorithmicEnv(AlgorithmicEnv):
def _move(self, movement):
named = self.MOVEMENTS[movement]
x, y = self.read_head_position
if named == 'left':
if named == "left":
x -= 1
elif named == 'right':
elif named == "right":
x += 1
elif named == 'up':
elif named == "up":
y -= 1
elif named == 'down':
elif named == "down":
y += 1
else:
raise ValueError("Unrecognized direction: {}".format(named))
@@ -322,13 +331,13 @@ class GridAlgorithmicEnv(AlgorithmicEnv):
x = self.read_head_position
label = "Observation Grid : "
x_str = ""
for j in range(-1, self.rows+1):
for j in range(-1, self.rows + 1):
if j != -1:
x_str += " " * len(label)
for i in range(-2, self.input_width + 2):
if i == x[0] and j == x[1]:
x_str += colorize(
self._get_str_obs((i, j)), 'green', highlight=True
self._get_str_obs((i, j)), "green", highlight=True
)
else:
x_str += self._get_str_obs((i, j))

View File

@@ -21,6 +21,4 @@ class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
return res
def target_from_input_data(self, input_data):
return [
input_data[i] for i in range(0, len(input_data), self.duplication)
]
return [input_data[i] for i in range(0, len(input_data), self.duplication)]

View File

@@ -6,7 +6,7 @@ from gym.envs.algorithmic import algorithmic_env
class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -0.1
def __init__(self, base=5):
super(RepeatCopyEnv, self).__init__(base=base, chars=True)

View File

@@ -6,12 +6,10 @@ from gym.envs.algorithmic import algorithmic_env
class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -0.1
def __init__(self, base=2):
super(ReverseEnv, self).__init__(
base=base, chars=True, starting_min_length=1
)
super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
self.last = 50
def target_from_input_data(self, input_str):

View File

@@ -3,9 +3,7 @@ from gym.envs.algorithmic import algorithmic_env
class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
def __init__(self, rows=2, base=3):
super(ReversedAdditionEnv, self).__init__(
rows=rows, base=base, chars=False
)
super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
def target_from_input_data(self, input_strings):
curry = 0
@@ -28,4 +26,4 @@ class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
# Addition3-v0 unsolvable, since agents aren't even given enough time
# steps to look at all the digits. (The solutions on the scoreboard
# seem to only work by save-scumming.)
return self.input_width*2 + 4
return self.input_width * 2 + 4

View File

@@ -11,12 +11,10 @@ ALL_ENVS = [
alg.reversed_addition.ReversedAdditionEnv,
]
ALL_TAPE_ENVS = [
env for env in ALL_ENVS
if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)
env for env in ALL_ENVS if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)
]
ALL_GRID_ENVS = [
env for env in ALL_ENVS
if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)
env for env in ALL_ENVS if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)
]
@@ -30,9 +28,10 @@ def imprint(env, input_arr):
class TestAlgorithmicEnvInteractions(unittest.TestCase):
"""Test some generic behaviour not specific to any particular algorithmic
environment. Movement, allocation of rewards, etc."""
CANNED_INPUT = [0, 1]
ENV_KLS = alg.copy_.CopyEnv
LEFT, RIGHT = ENV_KLS._movement_idx('left'), ENV_KLS._movement_idx('right')
LEFT, RIGHT = ENV_KLS._movement_idx("left"), ENV_KLS._movement_idx("right")
def setUp(self):
self.env = self.ENV_KLS(base=2, chars=True)
@@ -66,11 +65,11 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
self.assertTrue(done)
self.env.reset()
if i < self.env.last-1:
self.assertEqual(len(self.env.reward_shortfalls), i+1)
if i < self.env.last - 1:
self.assertEqual(len(self.env.reward_shortfalls), i + 1)
else:
# Should have leveled up on the last iteration
self.assertEqual(self.env.min_length, min_length+1)
self.assertEqual(self.env.min_length, min_length + 1)
self.assertEqual(len(self.env.reward_shortfalls), 0)
def test_walk_off_the_end(self):
@@ -95,7 +94,7 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=6)
N, S, E, W = [
env._movement_idx(named_dir)
for named_dir in ['up', 'down', 'right', 'left']
for named_dir in ["up", "down", "right", "left"]
]
# Corresponds to a grid that looks like...
# 0 1 2
@@ -105,7 +104,15 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
obs = env.reset()
self.assertEqual(obs, 0)
navigation = [
(S, 3), (N, 0), (E, 1), (S, 4), (S, 6), (E, 6), (N, 5), (N, 2), (W, 1)
(S, 3),
(N, 0),
(E, 1),
(S, 4),
(S, 6),
(E, 6),
(N, 5),
(N, 2),
(W, 1),
]
for (movement, expected_obs) in navigation:
obs, reward, done, _ = env.step([movement, 0, 0])
@@ -138,22 +145,23 @@ class TestAlgorithmicEnvInteractions(unittest.TestCase):
def test_rendering(self):
env = self.env
env.reset()
self.assertEqual(env._get_str_obs(), 'A')
self.assertEqual(env._get_str_obs(1), 'B')
self.assertEqual(env._get_str_obs(-1), ' ')
self.assertEqual(env._get_str_obs(2), ' ')
self.assertEqual(env._get_str_target(0), 'A')
self.assertEqual(env._get_str_target(1), 'B')
self.assertEqual(env._get_str_obs(), "A")
self.assertEqual(env._get_str_obs(1), "B")
self.assertEqual(env._get_str_obs(-1), " ")
self.assertEqual(env._get_str_obs(2), " ")
self.assertEqual(env._get_str_target(0), "A")
self.assertEqual(env._get_str_target(1), "B")
# Test numerical alphabet rendering
env = self.ENV_KLS(base=3, chars=False)
imprint(env, self.CANNED_INPUT)
env.reset()
self.assertEqual(env._get_str_obs(), '0')
self.assertEqual(env._get_str_obs(1), '1')
self.assertEqual(env._get_str_obs(), "0")
self.assertEqual(env._get_str_obs(1), "1")
class TestTargets(unittest.TestCase):
"""Test the rules mapping input strings/grids to target outputs."""
def test_reverse_target(self):
input_expected = [
([0], [0]),
@@ -175,9 +183,7 @@ class TestTargets(unittest.TestCase):
([[2, 1], [1, 1], [1, 1], [1, 0]], [0, 0, 0, 2]),
]
for (input_grid, expected_target) in input_expected:
self.assertEqual(
env.target_from_input_data(input_grid), expected_target
)
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
def test_reversed_addition_3rows(self):
env = alg.reversed_addition.ReversedAdditionEnv(base=3, rows=3)
@@ -186,9 +192,7 @@ class TestTargets(unittest.TestCase):
([[1, 1, 2], [0, 1, 1]], [1, 0, 1]),
]
for (input_grid, expected_target) in input_expected:
self.assertEqual(
env.target_from_input_data(input_grid), expected_target
)
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
def test_copy_target(self):
env = alg.copy_.CopyEnv()
@@ -196,9 +200,7 @@ class TestTargets(unittest.TestCase):
def test_duplicated_input_target(self):
env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
self.assertEqual(
env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1]
)
self.assertEqual(env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1])
def test_repeat_copy_target(self):
env = alg.repeat_copy.RepeatCopyEnv()
@@ -208,8 +210,8 @@ class TestTargets(unittest.TestCase):
class TestInputGeneration(unittest.TestCase):
"""Test random input generation.
"""
"""Test random input generation."""
def test_tape_inputs(self):
for env_kls in ALL_TAPE_ENVS:
env = env_kls()
@@ -217,9 +219,7 @@ class TestInputGeneration(unittest.TestCase):
input_tape = env.generate_input_data(size)
self.assertTrue(
all(0 <= x <= env.base for x in input_tape),
"Invalid input tape from env {}: {}".format(
env_kls, input_tape
)
"Invalid input tape from env {}: {}".format(env_kls, input_tape),
)
# DuplicatedInput needs to generate inputs with even length,
# so it may be short one
@@ -260,5 +260,5 @@ class TestInputGeneration(unittest.TestCase):
self.assertEqual(input_tape[1], input_tape[2])
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

View File

@@ -9,8 +9,9 @@ try:
import atari_py
except ImportError as e:
raise error.DependencyNotInstalled(
"{}. (HINT: you can install Atari dependencies by running "
"'pip install gym[atari]'.)".format(e))
"{}. (HINT: you can install Atari dependencies by running "
"'pip install gym[atari]'.)".format(e)
)
def to_ram(ale):
@@ -21,30 +22,32 @@ def to_ram(ale):
class AtariEnv(gym.Env, utils.EzPickle):
metadata = {'render.modes': ['human', 'rgb_array']}
metadata = {"render.modes": ["human", "rgb_array"]}
def __init__(
self,
game='pong',
mode=None,
difficulty=None,
obs_type='ram',
frameskip=(2, 5),
repeat_action_probability=0.,
full_action_space=False):
self,
game="pong",
mode=None,
difficulty=None,
obs_type="ram",
frameskip=(2, 5),
repeat_action_probability=0.0,
full_action_space=False,
):
"""Frameskip should be either a tuple (indicating a random range to
choose from, with the top value exclude), or an int."""
utils.EzPickle.__init__(
self,
game,
mode,
difficulty,
obs_type,
frameskip,
repeat_action_probability,
full_action_space)
assert obs_type in ('ram', 'image')
self,
game,
mode,
difficulty,
obs_type,
frameskip,
repeat_action_probability,
full_action_space,
)
assert obs_type in ("ram", "image")
self.game = game
self.game_path = atari_py.get_game_path(game)
@@ -52,7 +55,7 @@ class AtariEnv(gym.Env, utils.EzPickle):
self.game_difficulty = difficulty
if not os.path.exists(self.game_path):
msg = 'You asked for game %s but path %s does not exist'
msg = "You asked for game %s but path %s does not exist"
raise IOError(msg % (game, self.game_path))
self._obs_type = obs_type
self.frameskip = frameskip
@@ -61,41 +64,51 @@ class AtariEnv(gym.Env, utils.EzPickle):
# Tune (or disable) ALE's action repeat:
# https://github.com/openai/gym/issues/349
assert isinstance(repeat_action_probability, (float, int)), \
"Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
assert isinstance(
repeat_action_probability, (float, int)
), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
self.ale.setFloat(
'repeat_action_probability'.encode('utf-8'),
repeat_action_probability)
"repeat_action_probability".encode("utf-8"), repeat_action_probability
)
self.seed()
self._action_set = (self.ale.getLegalActionSet() if full_action_space
else self.ale.getMinimalActionSet())
self._action_set = (
self.ale.getLegalActionSet()
if full_action_space
else self.ale.getMinimalActionSet()
)
self.action_space = spaces.Discrete(len(self._action_set))
(screen_width, screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
if self._obs_type == "ram":
self.observation_space = spaces.Box(
low=0, high=255, dtype=np.uint8, shape=(128,)
)
elif self._obs_type == "image":
self.observation_space = spaces.Box(
low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8
)
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
raise error.Error(
"Unrecognized observation type: {}".format(self._obs_type)
)
def seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets
# checked as an int elsewhere, so we need to keep it below
# 2**31.
seed2 = seeding.hash_seed(seed1 + 1) % 2**31
seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
# Empirically, we need to seed before loading the ROM.
self.ale.setInt(b'random_seed', seed2)
self.ale.setInt(b"random_seed", seed2)
self.ale.loadROM(self.game_path)
if self.game_mode is not None:
modes = self.ale.getAvailableModes()
assert self.game_mode in modes, (
"Invalid game mode \"{}\" for game {}.\nAvailable modes are: {}"
'Invalid game mode "{}" for game {}.\nAvailable modes are: {}'
).format(self.game_mode, self.game, modes)
self.ale.setMode(self.game_mode)
@@ -103,7 +116,7 @@ class AtariEnv(gym.Env, utils.EzPickle):
difficulties = self.ale.getAvailableDifficulties()
assert self.game_difficulty in difficulties, (
"Invalid game difficulty \"{}\" for game {}.\nAvailable difficulties are: {}"
'Invalid game difficulty "{}" for game {}.\nAvailable difficulties are: {}'
).format(self.game_difficulty, self.game, difficulties)
self.ale.setDifficulty(self.game_difficulty)
@@ -134,9 +147,9 @@ class AtariEnv(gym.Env, utils.EzPickle):
return len(self._action_set)
def _get_obs(self):
if self._obs_type == 'ram':
if self._obs_type == "ram":
return self._get_ram()
elif self._obs_type == 'image':
elif self._obs_type == "image":
img = self._get_image()
return img
@@ -145,12 +158,13 @@ class AtariEnv(gym.Env, utils.EzPickle):
self.ale.reset_game()
return self._get_obs()
def render(self, mode='human'):
def render(self, mode="human"):
img = self._get_image()
if mode == 'rgb_array':
if mode == "rgb_array":
return img
elif mode == 'human':
elif mode == "human":
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(img)
@@ -166,11 +180,11 @@ class AtariEnv(gym.Env, utils.EzPickle):
def get_keys_to_action(self):
KEYWORD_TO_KEY = {
'UP': ord('w'),
'DOWN': ord('s'),
'LEFT': ord('a'),
'RIGHT': ord('d'),
'FIRE': ord(' '),
"UP": ord("w"),
"DOWN": ord("s"),
"LEFT": ord("a"),
"RIGHT": ord("d"),
"FIRE": ord(" "),
}
keys_to_action = {}

View File

@@ -3,7 +3,14 @@ import math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
from Box2D.b2 import (
edgeShape,
circleShape,
fixtureDef,
polygonShape,
revoluteJointDef,
contactListener,
)
import gym
from gym import spaces
@@ -37,75 +44,79 @@ from gym.utils import colorize, seeding, EzPickle
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MOTORS_TORQUE = 80
SPEED_HIP = 4
SPEED_KNEE = 6
LIDAR_RANGE = 160/SCALE
SPEED_HIP = 4
SPEED_KNEE = 6
LIDAR_RANGE = 160 / SCALE
INITIAL_RANDOM = 5
HULL_POLY =[
(-30,+9), (+6,+9), (+34,+1),
(+34,-8), (-30,-8)
]
LEG_DOWN = -8/SCALE
LEG_W, LEG_H = 8/SCALE, 34/SCALE
HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
LEG_DOWN = -8 / SCALE
LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
VIEWPORT_W = 600
VIEWPORT_H = 400
TERRAIN_STEP = 14/SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
TERRAIN_STEP = 14 / SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
FRICTION = 2.5
HULL_FD = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0,
) # 0.99 bouncy
LEG_FD = fixtureDef(
shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
)
LOWER_FD = fixtureDef(
shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
)
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
if (
self.env.hull == contact.fixtureA.body
or self.env.hull == contact.fixtureB.body
):
self.env.game_over = True
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = True
def EndContact(self, contact):
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = False
class BipedalWalker(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}
hardcore = False
@@ -121,25 +132,23 @@ class BipedalWalker(gym.Env, EzPickle):
self.prev_shaping = None
self.fd_polygon = fixtureDef(
shape = polygonShape(vertices=
[(0, 0),
(1, 0),
(1, -1),
(0, -1)]),
friction = FRICTION)
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
friction=FRICTION,
)
self.fd_edge = fixtureDef(
shape = edgeShape(vertices=
[(0, 0),
(1, 1)]),
friction = FRICTION,
categoryBits=0x0001,
)
shape=edgeShape(vertices=[(0, 0), (1, 1)]),
friction=FRICTION,
categoryBits=0x0001,
)
self.reset()
high = np.array([np.inf] * 24).astype(np.float32)
self.action_space = spaces.Box(np.array([-1, -1, -1, -1]).astype(np.float32), np.array([1, 1, 1, 1]).astype(np.float32))
self.action_space = spaces.Box(
np.array([-1, -1, -1, -1]).astype(np.float32),
np.array([1, 1, 1, 1]).astype(np.float32),
)
self.observation_space = spaces.Box(-high, high)
def seed(self, seed=None):
@@ -147,7 +156,8 @@ class BipedalWalker(gym.Env, EzPickle):
return [seed]
def _destroy(self):
if not self.terrain: return
if not self.terrain:
return
self.world.contactListener = None
for t in self.terrain:
self.world.DestroyBody(t)
@@ -161,94 +171,105 @@ class BipedalWalker(gym.Env, EzPickle):
def _generate_terrain(self, hardcore):
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
state = GRASS
state = GRASS
velocity = 0.0
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
self.terrain_x = []
self.terrain_y = []
for i in range(TERRAIN_LENGTH):
x = i*TERRAIN_STEP
x = i * TERRAIN_STEP
self.terrain_x.append(x)
if state==GRASS and not oneshot:
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE #1
if state == GRASS and not oneshot:
velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD:
velocity += self.np_random.uniform(-1, 1) / SCALE # 1
y += velocity
elif state==PIT and oneshot:
elif state == PIT and oneshot:
counter = self.np_random.randint(3, 5)
poly = [
(x, y),
(x+TERRAIN_STEP, y),
(x+TERRAIN_STEP, y-4*TERRAIN_STEP),
(x, y-4*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
(x, y),
(x + TERRAIN_STEP, y),
(x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
(x, y - 4 * TERRAIN_STEP),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
self.terrain.append(t)
self.fd_polygon.shape.vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.fd_polygon.shape.vertices = [
(p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
]
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
self.terrain.append(t)
counter += 2
original_y = y
elif state==PIT and not oneshot:
elif state == PIT and not oneshot:
y = original_y
if counter > 1:
y -= 4*TERRAIN_STEP
y -= 4 * TERRAIN_STEP
elif state==STUMP and oneshot:
elif state == STUMP and oneshot:
counter = self.np_random.randint(1, 3)
poly = [
(x, y),
(x+counter*TERRAIN_STEP, y),
(x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
(x, y+counter*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
(x, y),
(x + counter * TERRAIN_STEP, y),
(x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
(x, y + counter * TERRAIN_STEP),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
self.terrain.append(t)
elif state==STAIRS and oneshot:
elif state == STAIRS and oneshot:
stair_height = +1 if self.np_random.rand() > 0.5 else -1
stair_width = self.np_random.randint(4, 5)
stair_steps = self.np_random.randint(3, 5)
original_y = y
for s in range(stair_steps):
poly = [
(x+( s*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
(x+( s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
(
x + (s * stair_width) * TERRAIN_STEP,
y + (s * stair_height) * TERRAIN_STEP,
),
(
x + ((1 + s) * stair_width) * TERRAIN_STEP,
y + (s * stair_height) * TERRAIN_STEP,
),
(
x + ((1 + s) * stair_width) * TERRAIN_STEP,
y + (-1 + s * stair_height) * TERRAIN_STEP,
),
(
x + (s * stair_width) * TERRAIN_STEP,
y + (-1 + s * stair_height) * TERRAIN_STEP,
),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
self.terrain.append(t)
counter = stair_steps*stair_width
counter = stair_steps * stair_width
elif state==STAIRS and not oneshot:
s = stair_steps*stair_width - counter - stair_height
n = s/stair_width
y = original_y + (n*stair_height)*TERRAIN_STEP
elif state == STAIRS and not oneshot:
s = stair_steps * stair_width - counter - stair_height
n = s / stair_width
y = original_y + (n * stair_height) * TERRAIN_STEP
oneshot = False
self.terrain_y.append(y)
counter -= 1
if counter==0:
counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
if state==GRASS and hardcore:
if counter == 0:
counter = self.np_random.randint(TERRAIN_GRASS / 2, TERRAIN_GRASS)
if state == GRASS and hardcore:
state = self.np_random.randint(1, _STATES_)
oneshot = True
else:
@@ -256,36 +277,42 @@ class BipedalWalker(gym.Env, EzPickle):
oneshot = True
self.terrain_poly = []
for i in range(TERRAIN_LENGTH-1):
for i in range(TERRAIN_LENGTH - 1):
poly = [
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i+1], self.terrain_y[i+1])
]
self.fd_edge.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_edge)
color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i + 1], self.terrain_y[i + 1]),
]
self.fd_edge.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_edge)
color = (0.3, 1.0 if i % 2 == 0 else 0.8, 0.3)
t.color1 = color
t.color2 = color
self.terrain.append(t)
color = (0.4, 0.6, 0.3)
poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
self.terrain_poly.append( (poly, color) )
poly += [(poly[1][0], 0), (poly[0][0], 0)]
self.terrain_poly.append((poly, color))
self.terrain.reverse()
def _generate_clouds(self):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in range(TERRAIN_LENGTH//20):
x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
y = VIEWPORT_H/SCALE*3/4
self.cloud_poly = []
for i in range(TERRAIN_LENGTH // 20):
x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
y = VIEWPORT_H / SCALE * 3 / 4
poly = [
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
for a in range(5) ]
x1 = min( [p[0] for p in poly] )
x2 = max( [p[0] for p in poly] )
self.cloud_poly.append( (poly,x1,x2) )
(
x
+ 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
y
+ 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
)
for a in range(5)
]
x1 = min([p[0] for p in poly])
x2 = max([p[0] for p in poly])
self.cloud_poly.append((poly, x1, x2))
def reset(self):
self._destroy()
@@ -296,66 +323,67 @@ class BipedalWalker(gym.Env, EzPickle):
self.scroll = 0.0
self.lidar_render = 0
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
W = VIEWPORT_W / SCALE
H = VIEWPORT_H / SCALE
self._generate_terrain(self.hardcore)
self._generate_clouds()
init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
init_y = TERRAIN_HEIGHT+2*LEG_H
init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
init_y = TERRAIN_HEIGHT + 2 * LEG_H
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
fixtures = HULL_FD
)
self.hull.color1 = (0.5,0.4,0.9)
self.hull.color2 = (0.3,0.3,0.5)
self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
position=(init_x, init_y), fixtures=HULL_FD
)
self.hull.color1 = (0.5, 0.4, 0.9)
self.hull.color2 = (0.3, 0.3, 0.5)
self.hull.ApplyForceToCenter(
(self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
)
self.legs = []
self.joints = []
for i in [-1,+1]:
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = LEG_FD
)
leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
angle=(i * 0.05),
fixtures=LEG_FD,
)
leg.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
leg.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=leg,
localAnchorA=(0, LEG_DOWN),
localAnchorB=(0, LEG_H/2),
localAnchorB=(0, LEG_H / 2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = i,
lowerAngle = -0.8,
upperAngle = 1.1,
)
motorSpeed=i,
lowerAngle=-0.8,
upperAngle=1.1,
)
self.legs.append(leg)
self.joints.append(self.world.CreateJoint(rjd))
lower = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = LOWER_FD
)
lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
angle=(i * 0.05),
fixtures=LOWER_FD,
)
lower.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
lower.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
rjd = revoluteJointDef(
bodyA=leg,
bodyB=lower,
localAnchorA=(0, -LEG_H/2),
localAnchorB=(0, LEG_H/2),
localAnchorA=(0, -LEG_H / 2),
localAnchorB=(0, LEG_H / 2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = 1,
lowerAngle = -1.6,
upperAngle = -0.1,
)
motorSpeed=1,
lowerAngle=-1.6,
upperAngle=-0.1,
)
lower.ground_contact = False
self.legs.append(lower)
self.joints.append(self.world.CreateJoint(rjd))
@@ -369,29 +397,38 @@ class BipedalWalker(gym.Env, EzPickle):
self.p2 = point
self.fraction = fraction
return fraction
self.lidar = [LidarCallback() for _ in range(10)]
return self.step(np.array([0,0,0,0]))[0]
return self.step(np.array([0, 0, 0, 0]))[0]
def step(self, action):
#self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
# self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
else:
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
)
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
)
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
)
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
)
self.world.Step(1.0/FPS, 6*30, 2*30)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.hull.position
vel = self.hull.linearVelocity
@@ -400,16 +437,19 @@ class BipedalWalker(gym.Env, EzPickle):
self.lidar[i].fraction = 1.0
self.lidar[i].p1 = pos
self.lidar[i].p2 = (
pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
)
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
state = [
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
2.0*self.hull.angularVelocity/FPS,
0.3*vel.x*(VIEWPORT_W/SCALE)/FPS, # Normalized to get -1..1 range
0.3*vel.y*(VIEWPORT_H/SCALE)/FPS,
self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
2.0 * self.hull.angularVelocity / FPS,
0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
self.joints[
0
].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.joints[0].speed / SPEED_HIP,
self.joints[1].angle + 1.0,
self.joints[1].speed / SPEED_KNEE,
@@ -418,15 +458,19 @@ class BipedalWalker(gym.Env, EzPickle):
self.joints[2].speed / SPEED_HIP,
self.joints[3].angle + 1.0,
self.joints[3].speed / SPEED_KNEE,
1.0 if self.legs[3].ground_contact else 0.0
]
1.0 if self.legs[3].ground_contact else 0.0,
]
state += [l.fraction for l in self.lidar]
assert len(state)==24
assert len(state) == 24
self.scroll = pos.x - VIEWPORT_W/SCALE/5
self.scroll = pos.x - VIEWPORT_W / SCALE / 5
shaping = 130*pos[0]/SCALE # moving forward is a way to receive reward (normalized to get 300 on completion)
shaping -= 5.0*abs(state[0]) # keep head straight, other than that and falling, any behavior is unpunished
shaping = (
130 * pos[0] / SCALE
) # moving forward is a way to receive reward (normalized to get 300 on completion)
shaping -= 5.0 * abs(
state[0]
) # keep head straight, other than that and falling, any behavior is unpunished
reward = 0
if self.prev_shaping is not None:
@@ -440,77 +484,105 @@ class BipedalWalker(gym.Env, EzPickle):
done = False
if self.game_over or pos[0] < 0:
reward = -100
done = True
if pos[0] > (TERRAIN_LENGTH-TERRAIN_GRASS)*TERRAIN_STEP:
done = True
done = True
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
done = True
return np.array(state), reward, done, {}
def render(self, mode='human'):
def render(self, mode="human"):
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
self.viewer.set_bounds(
self.scroll, VIEWPORT_W / SCALE + self.scroll, 0, VIEWPORT_H / SCALE
)
self.viewer.draw_polygon( [
(self.scroll, 0),
(self.scroll+VIEWPORT_W/SCALE, 0),
(self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
(self.scroll, VIEWPORT_H/SCALE),
], color=(0.9, 0.9, 1.0) )
for poly,x1,x2 in self.cloud_poly:
if x2 < self.scroll/2: continue
if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
self.viewer.draw_polygon(
[
(self.scroll, 0),
(self.scroll + VIEWPORT_W / SCALE, 0),
(self.scroll + VIEWPORT_W / SCALE, VIEWPORT_H / SCALE),
(self.scroll, VIEWPORT_H / SCALE),
],
color=(0.9, 0.9, 1.0),
)
for poly, x1, x2 in self.cloud_poly:
if x2 < self.scroll / 2:
continue
if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
continue
self.viewer.draw_polygon(
[(p[0] + self.scroll / 2, p[1]) for p in poly], color=(1, 1, 1)
)
for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll: continue
if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
if poly[1][0] < self.scroll:
continue
if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
continue
self.viewer.draw_polygon(poly, color=color)
self.lidar_render = (self.lidar_render+1) % 100
self.lidar_render = (self.lidar_render + 1) % 100
i = self.lidar_render
if i < 2*len(self.lidar):
l = self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar)-i-1]
self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
if i < 2 * len(self.lidar):
l = (
self.lidar[i]
if i < len(self.lidar)
else self.lidar[len(self.lidar) - i - 1]
)
self.viewer.draw_polyline([l.p1, l.p2], color=(1, 0, 0), linewidth=1)
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
t = rendering.Transform(translation=trans * f.shape.pos)
self.viewer.draw_circle(
f.shape.radius, 30, color=obj.color1
).add_attr(t)
self.viewer.draw_circle(
f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2
).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
path = [trans * v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
flagy1 = TERRAIN_HEIGHT
flagy2 = flagy1 + 50/SCALE
x = TERRAIN_STEP*3
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
flagy2 = flagy1 + 50 / SCALE
x = TERRAIN_STEP * 3
self.viewer.draw_polyline(
[(x, flagy1), (x, flagy2)], color=(0, 0, 0), linewidth=2
)
f = [
(x, flagy2),
(x, flagy2 - 10 / SCALE),
(x + 25 / SCALE, flagy2 - 5 / SCALE),
]
self.viewer.draw_polygon(f, color=(0.9, 0.2, 0))
self.viewer.draw_polyline(f + [f[0]], color=(0, 0, 0), linewidth=2)
return self.viewer.render(return_rgb_array = mode=='rgb_array')
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
class BipedalWalkerHardcore(BipedalWalker):
hardcore = True
if __name__=="__main__":
if __name__ == "__main__":
# Heurisic: suboptimal, have no notion of balance.
env = BipedalWalker()
env.reset()
steps = 0
total_reward = 0
a = np.array([0.0, 0.0, 0.0, 0.0])
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1,2,3
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
SPEED = 0.29 # Will fall forward on higher speed
state = STAY_ON_ONE_LEG
moving_leg = 0
@@ -523,60 +595,66 @@ if __name__=="__main__":
if steps % 20 == 0 or done:
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4] ]))
print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9] ]))
print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4]]))
print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9]]))
print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
steps += 1
contact0 = s[8]
contact1 = s[13]
moving_s_base = 4 + 5*moving_leg
supporting_s_base = 4 + 5*supporting_leg
moving_s_base = 4 + 5 * moving_leg
supporting_s_base = 4 + 5 * supporting_leg
hip_targ = [None,None] # -0.8 .. +1.1
knee_targ = [None,None] # -0.6 .. +0.9
hip_todo = [0.0, 0.0]
hip_targ = [None, None] # -0.8 .. +1.1
knee_targ = [None, None] # -0.6 .. +0.9
hip_todo = [0.0, 0.0]
knee_todo = [0.0, 0.0]
if state==STAY_ON_ONE_LEG:
hip_targ[moving_leg] = 1.1
if state == STAY_ON_ONE_LEG:
hip_targ[moving_leg] = 1.1
knee_targ[moving_leg] = -0.6
supporting_knee_angle += 0.03
if s[2] > SPEED: supporting_knee_angle += 0.03
supporting_knee_angle = min( supporting_knee_angle, SUPPORT_KNEE_ANGLE )
if s[2] > SPEED:
supporting_knee_angle += 0.03
supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
knee_targ[supporting_leg] = supporting_knee_angle
if s[supporting_s_base+0] < 0.10: # supporting leg is behind
if s[supporting_s_base + 0] < 0.10: # supporting leg is behind
state = PUT_OTHER_DOWN
if state==PUT_OTHER_DOWN:
hip_targ[moving_leg] = +0.1
if state == PUT_OTHER_DOWN:
hip_targ[moving_leg] = +0.1
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
knee_targ[supporting_leg] = supporting_knee_angle
if s[moving_s_base+4]:
if s[moving_s_base + 4]:
state = PUSH_OFF
supporting_knee_angle = min( s[moving_s_base+2], SUPPORT_KNEE_ANGLE )
if state==PUSH_OFF:
supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
if state == PUSH_OFF:
knee_targ[moving_leg] = supporting_knee_angle
knee_targ[supporting_leg] = +1.0
if s[supporting_s_base+2] > 0.88 or s[2] > 1.2*SPEED:
if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
state = STAY_ON_ONE_LEG
moving_leg = 1 - moving_leg
supporting_leg = 1 - moving_leg
if hip_targ[0]: hip_todo[0] = 0.9*(hip_targ[0] - s[4]) - 0.25*s[5]
if hip_targ[1]: hip_todo[1] = 0.9*(hip_targ[1] - s[9]) - 0.25*s[10]
if knee_targ[0]: knee_todo[0] = 4.0*(knee_targ[0] - s[6]) - 0.25*s[7]
if knee_targ[1]: knee_todo[1] = 4.0*(knee_targ[1] - s[11]) - 0.25*s[12]
if hip_targ[0]:
hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
if hip_targ[1]:
hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
if knee_targ[0]:
knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
if knee_targ[1]:
knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
hip_todo[0] -= 0.9*(0-s[0]) - 1.5*s[1] # PID to keep head strait
hip_todo[1] -= 0.9*(0-s[0]) - 1.5*s[1]
knee_todo[0] -= 15.0*s[3] # vertical speed, to damp oscillations
knee_todo[1] -= 15.0*s[3]
hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait
hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations
knee_todo[1] -= 15.0 * s[3]
a[0] = hip_todo[0]
a[1] = knee_todo[0]
a[2] = hip_todo[1]
a[3] = knee_todo[1]
a = np.clip(0.5*a, -1.0, 1.0)
a = np.clip(0.5 * a, -1.0, 1.0)
env.render()
if done: break
if done:
break

View File

@@ -10,26 +10,27 @@ Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
import numpy as np
import math
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
from Box2D.b2 import (
edgeShape,
circleShape,
fixtureDef,
polygonShape,
revoluteJointDef,
contactListener,
shape,
)
SIZE = 0.02
ENGINE_POWER = 100000000*SIZE*SIZE
WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
FRICTION_LIMIT = 1000000*SIZE*SIZE # friction ~= mass ~= size^2 (calculated implicitly using density)
ENGINE_POWER = 100000000 * SIZE * SIZE
WHEEL_MOMENT_OF_INERTIA = 4000 * SIZE * SIZE
FRICTION_LIMIT = (
1000000 * SIZE * SIZE
) # friction ~= mass ~= size^2 (calculated implicitly using density)
WHEEL_R = 27
WHEEL_W = 14
WHEELPOS = [
(-55, +80), (+55, +80),
(-55, -82), (+55, -82)
]
HULL_POLY1 = [
(-60, +130), (+60, +130),
(+60, +110), (-60, +110)
]
HULL_POLY2 = [
(-15, +120), (+15, +120),
(+20, +20), (-20, 20)
]
WHEELPOS = [(-55, +80), (+55, +80), (-55, -82), (+55, -82)]
HULL_POLY1 = [(-60, +130), (+60, +130), (+60, +110), (-60, +110)]
HULL_POLY2 = [(-15, +120), (+15, +120), (+20, +20), (-20, 20)]
HULL_POLY3 = [
(+25, +20),
(+50, -10),
@@ -38,13 +39,10 @@ HULL_POLY3 = [
(-20, -90),
(-50, -40),
(-50, -10),
(-25, +20)
]
HULL_POLY4 = [
(-50, -120), (+50, -120),
(+50, -90), (-50, -90)
]
WHEEL_COLOR = (0.0, 0.0, 0.0)
(-25, +20),
]
HULL_POLY4 = [(-50, -120), (+50, -120), (+50, -90), (-50, -90)]
WHEEL_COLOR = (0.0, 0.0, 0.0)
WHEEL_WHITE = (0.3, 0.3, 0.3)
MUD_COLOR = (0.4, 0.4, 0.0)
@@ -56,32 +54,60 @@ class Car:
position=(init_x, init_y),
angle=init_angle,
fixtures=[
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY1]), density=1.0),
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY2]), density=1.0),
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY3]), density=1.0),
fixtureDef(shape=polygonShape(vertices=[(x*SIZE, y*SIZE) for x, y in HULL_POLY4]), density=1.0)
]
)
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY1]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY2]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY3]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY4]
),
density=1.0,
),
],
)
self.hull.color = (0.8, 0.0, 0.0)
self.wheels = []
self.fuel_spent = 0.0
WHEEL_POLY = [
(-WHEEL_W, +WHEEL_R), (+WHEEL_W, +WHEEL_R),
(+WHEEL_W, -WHEEL_R), (-WHEEL_W, -WHEEL_R)
]
(-WHEEL_W, +WHEEL_R),
(+WHEEL_W, +WHEEL_R),
(+WHEEL_W, -WHEEL_R),
(-WHEEL_W, -WHEEL_R),
]
for wx, wy in WHEELPOS:
front_k = 1.0 if wy > 0 else 1.0
w = self.world.CreateDynamicBody(
position=(init_x+wx*SIZE, init_y+wy*SIZE),
position=(init_x + wx * SIZE, init_y + wy * SIZE),
angle=init_angle,
fixtures=fixtureDef(
shape=polygonShape(vertices=[(x*front_k*SIZE,y*front_k*SIZE) for x, y in WHEEL_POLY]),
shape=polygonShape(
vertices=[
(x * front_k * SIZE, y * front_k * SIZE)
for x, y in WHEEL_POLY
]
),
density=0.1,
categoryBits=0x0020,
maskBits=0x001,
restitution=0.0)
)
w.wheel_rad = front_k*WHEEL_R*SIZE
restitution=0.0,
),
)
w.wheel_rad = front_k * WHEEL_R * SIZE
w.color = WHEEL_COLOR
w.gas = 0.0
w.brake = 0.0
@@ -93,15 +119,15 @@ class Car:
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=w,
localAnchorA=(wx*SIZE, wy*SIZE),
localAnchorB=(0,0),
localAnchorA=(wx * SIZE, wy * SIZE),
localAnchorB=(0, 0),
enableMotor=True,
enableLimit=True,
maxMotorTorque=180*900*SIZE*SIZE,
maxMotorTorque=180 * 900 * SIZE * SIZE,
motorSpeed=0,
lowerAngle=-0.4,
upperAngle=+0.4,
)
)
w.joint = self.world.CreateJoint(rjd)
w.tiles = set()
w.userData = w
@@ -118,7 +144,8 @@ class Car:
gas = np.clip(gas, 0, 1)
for w in self.wheels[2:4]:
diff = gas - w.gas
if diff > 0.1: diff = 0.1 # gradually increase, but stop immediately
if diff > 0.1:
diff = 0.1 # gradually increase, but stop immediately
w.gas += diff
def brake(self, b):
@@ -142,60 +169,75 @@ class Car:
# Steer each wheel
dir = np.sign(w.steer - w.joint.angle)
val = abs(w.steer - w.joint.angle)
w.joint.motorSpeed = dir*min(50.0*val, 3.0)
w.joint.motorSpeed = dir * min(50.0 * val, 3.0)
# Position => friction_limit
grass = True
friction_limit = FRICTION_LIMIT*0.6 # Grass friction if no tile
friction_limit = FRICTION_LIMIT * 0.6 # Grass friction if no tile
for tile in w.tiles:
friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
friction_limit = max(
friction_limit, FRICTION_LIMIT * tile.road_friction
)
grass = False
# Force
forw = w.GetWorldVector( (0,1) )
side = w.GetWorldVector( (1,0) )
forw = w.GetWorldVector((0, 1))
side = w.GetWorldVector((1, 0))
v = w.linearVelocity
vf = forw[0]*v[0] + forw[1]*v[1] # forward speed
vs = side[0]*v[0] + side[1]*v[1] # side speed
vf = forw[0] * v[0] + forw[1] * v[1] # forward speed
vs = side[0] * v[0] + side[1] * v[1] # side speed
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
# add small coef not to divide by zero
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)
self.fuel_spent += dt*ENGINE_POWER*w.gas
w.omega += (
dt
* ENGINE_POWER
* w.gas
/ WHEEL_MOMENT_OF_INERTIA
/ (abs(w.omega) + 5.0)
)
self.fuel_spent += dt * ENGINE_POWER * w.gas
if w.brake >= 0.9:
w.omega = 0
elif w.brake > 0:
BRAKE_FORCE = 15 # radians per second
BRAKE_FORCE = 15 # radians per second
dir = -np.sign(w.omega)
val = BRAKE_FORCE*w.brake
if abs(val) > abs(w.omega): val = abs(w.omega) # low speed => same as = 0
w.omega += dir*val
w.phase += w.omega*dt
val = BRAKE_FORCE * w.brake
if abs(val) > abs(w.omega):
val = abs(w.omega) # low speed => same as = 0
w.omega += dir * val
w.phase += w.omega * dt
vr = w.omega*w.wheel_rad # rotating wheel speed
f_force = -vf + vr # force direction is direction of speed difference
vr = w.omega * w.wheel_rad # rotating wheel speed
f_force = -vf + vr # force direction is direction of speed difference
p_force = -vs
# Physically correct is to always apply friction_limit until speed is equal.
# But dt is finite, that will lead to oscillations if difference is already near zero.
# Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
f_force *= 205000*SIZE*SIZE
p_force *= 205000*SIZE*SIZE
f_force *= 205000 * SIZE * SIZE
p_force *= 205000 * SIZE * SIZE
force = np.sqrt(np.square(f_force) + np.square(p_force))
# Skid trace
if abs(force) > 2.0*friction_limit:
if w.skid_particle and w.skid_particle.grass == grass and len(w.skid_particle.poly) < 30:
w.skid_particle.poly.append( (w.position[0], w.position[1]) )
if abs(force) > 2.0 * friction_limit:
if (
w.skid_particle
and w.skid_particle.grass == grass
and len(w.skid_particle.poly) < 30
):
w.skid_particle.poly.append((w.position[0], w.position[1]))
elif w.skid_start is None:
w.skid_start = w.position
else:
w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
w.skid_particle = self._create_particle(
w.skid_start, w.position, grass
)
w.skid_start = None
else:
w.skid_start = None
@@ -208,11 +250,15 @@ class Car:
f_force *= force
p_force *= force
w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
w.omega -= dt * f_force * w.wheel_rad / WHEEL_MOMENT_OF_INERTIA
w.ApplyForceToCenter( (
p_force*side[0] + f_force*forw[0],
p_force*side[1] + f_force*forw[1]), True )
w.ApplyForceToCenter(
(
p_force * side[0] + f_force * forw[0],
p_force * side[1] + f_force * forw[1],
),
True,
)
def draw(self, viewer, draw_particles=True):
if draw_particles:
@@ -221,27 +267,34 @@ class Car:
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
path = [trans*v for v in f.shape.vertices]
path = [trans * v for v in f.shape.vertices]
viewer.draw_polygon(path, color=obj.color)
if "phase" not in obj.__dict__: continue
if "phase" not in obj.__dict__:
continue
a1 = obj.phase
a2 = obj.phase + 1.2 # radians
s1 = math.sin(a1)
s2 = math.sin(a2)
c1 = math.cos(a1)
c2 = math.cos(a2)
if s1 > 0 and s2 > 0: continue
if s1 > 0: c1 = np.sign(c1)
if s2 > 0: c2 = np.sign(c2)
if s1 > 0 and s2 > 0:
continue
if s1 > 0:
c1 = np.sign(c1)
if s2 > 0:
c2 = np.sign(c2)
white_poly = [
(-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
(+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
]
viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
(-WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
(+WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
(+WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
(-WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
]
viewer.draw_polygon([trans * v for v in white_poly], color=WHEEL_WHITE)
def _create_particle(self, point1, point2, grass):
class Particle:
pass
p = Particle()
p.color = WHEEL_COLOR if not grass else MUD_COLOR
p.ttl = 1
@@ -258,4 +311,3 @@ class Car:
for w in self.wheels:
self.world.DestroyBody(w)
self.wheels = []

View File

@@ -137,7 +137,8 @@ class CarRacing(gym.Env, EzPickle):
)
self.action_space = spaces.Box(
np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32)
np.array([-1, 0, 0]).astype(np.float32),
np.array([+1, +1, +1]).astype(np.float32),
) # steer, gas, brake
self.observation_space = spaces.Box(

View File

@@ -26,28 +26,33 @@ Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
"""
import sys, math
import math
import sys
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
from Box2D.b2 import (
edgeShape,
circleShape,
fixtureDef,
polygonShape,
revoluteJointDef,
contactListener,
)
import gym
from gym import spaces
from gym.utils import seeding, EzPickle
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MAIN_ENGINE_POWER = 13.0
SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY =[
(-14, +17), (-17, 0), (-17 ,-10),
(+17, -10), (+17, 0), (+14, +17)
]
LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)]
LEG_AWAY = 20
LEG_DOWN = 18
LEG_W, LEG_H = 2, 8
@@ -66,7 +71,10 @@ class ContactDetector(contactListener):
self.env = env
def BeginContact(self, contact):
if self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body:
if (
self.env.lander == contact.fixtureA.body
or self.env.lander == contact.fixtureB.body
):
self.env.game_over = True
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
@@ -79,10 +87,7 @@ class ContactDetector(contactListener):
class LunarLander(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}
continuous = False
@@ -99,7 +104,9 @@ class LunarLander(gym.Env, EzPickle):
self.prev_reward = None
# useful range is -1 .. +1, but spikes can be higher
self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
self.observation_space = spaces.Box(
-np.inf, np.inf, shape=(8,), dtype=np.float32
)
if self.continuous:
# Action is two floats [main engine, left-right engines].
@@ -117,7 +124,8 @@ class LunarLander(gym.Env, EzPickle):
return [seed]
def _destroy(self):
if not self.moon: return
if not self.moon:
return
self.world.contactListener = None
self._clean_particles(True)
self.world.DestroyBody(self.moon)
@@ -134,68 +142,77 @@ class LunarLander(gym.Env, EzPickle):
self.game_over = False
self.prev_shaping = None
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
W = VIEWPORT_W / SCALE
H = VIEWPORT_H / SCALE
# terrain
CHUNKS = 11
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,))
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS//2-1]
self.helipad_x2 = chunk_x[CHUNKS//2+1]
self.helipad_y = H/4
height[CHUNKS//2-2] = self.helipad_y
height[CHUNKS//2-1] = self.helipad_y
height[CHUNKS//2+0] = self.helipad_y
height[CHUNKS//2+1] = self.helipad_y
height[CHUNKS//2+2] = self.helipad_y
smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,))
chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS // 2 - 1]
self.helipad_x2 = chunk_x[CHUNKS // 2 + 1]
self.helipad_y = H / 4
height[CHUNKS // 2 - 2] = self.helipad_y
height[CHUNKS // 2 - 1] = self.helipad_y
height[CHUNKS // 2 + 0] = self.helipad_y
height[CHUNKS // 2 + 1] = self.helipad_y
height[CHUNKS // 2 + 2] = self.helipad_y
smooth_y = [
0.33 * (height[i - 1] + height[i + 0] + height[i + 1])
for i in range(CHUNKS)
]
self.moon = self.world.CreateStaticBody(shapes=edgeShape(vertices=[(0, 0), (W, 0)]))
self.moon = self.world.CreateStaticBody(
shapes=edgeShape(vertices=[(0, 0), (W, 0)])
)
self.sky_polys = []
for i in range(CHUNKS-1):
for i in range(CHUNKS - 1):
p1 = (chunk_x[i], smooth_y[i])
p2 = (chunk_x[i+1], smooth_y[i+1])
self.moon.CreateEdgeFixture(
vertices=[p1,p2],
density=0,
friction=0.1)
p2 = (chunk_x[i + 1], smooth_y[i + 1])
self.moon.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1)
self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
self.moon.color1 = (0.0, 0.0, 0.0)
self.moon.color2 = (0.0, 0.0, 0.0)
initial_y = VIEWPORT_H/SCALE
initial_y = VIEWPORT_H / SCALE
self.lander = self.world.CreateDynamicBody(
position=(VIEWPORT_W/SCALE/2, initial_y),
position=(VIEWPORT_W / SCALE / 2, initial_y),
angle=0.0,
fixtures = fixtureDef(
shape=polygonShape(vertices=[(x/SCALE, y/SCALE) for x, y in LANDER_POLY]),
fixtures=fixtureDef(
shape=polygonShape(
vertices=[(x / SCALE, y / SCALE) for x, y in LANDER_POLY]
),
density=5.0,
friction=0.1,
categoryBits=0x0010,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
)
maskBits=0x001, # collide only with ground
restitution=0.0,
), # 0.99 bouncy
)
self.lander.color1 = (0.5, 0.4, 0.9)
self.lander.color2 = (0.3, 0.3, 0.5)
self.lander.ApplyForceToCenter( (
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
), True)
self.lander.ApplyForceToCenter(
(
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
),
True,
)
self.legs = []
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position=(VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
position=(VIEWPORT_W / SCALE / 2 - i * LEG_AWAY / SCALE, initial_y),
angle=(i * 0.05),
fixtures=fixtureDef(
shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
maskBits=0x001,
),
)
leg.ground_contact = False
leg.color1 = (0.5, 0.4, 0.9)
leg.color2 = (0.3, 0.3, 0.5)
@@ -203,14 +220,16 @@ class LunarLander(gym.Env, EzPickle):
bodyA=self.lander,
bodyB=leg,
localAnchorA=(0, 0),
localAnchorB=(i * LEG_AWAY/SCALE, LEG_DOWN/SCALE),
localAnchorB=(i * LEG_AWAY / SCALE, LEG_DOWN / SCALE),
enableMotor=True,
enableLimit=True,
maxMotorTorque=LEG_SPRING_TORQUE,
motorSpeed=+0.3 * i # low enough not to jump back into the sky
)
motorSpeed=+0.3 * i, # low enough not to jump back into the sky
)
if i == -1:
rjd.lowerAngle = +0.9 - 0.5 # The most esoteric numbers here, angled legs have freedom to travel within
rjd.lowerAngle = (
+0.9 - 0.5
) # The most esoteric numbers here, angled legs have freedom to travel within
rjd.upperAngle = +0.9
else:
rjd.lowerAngle = -0.9
@@ -224,16 +243,17 @@ class LunarLander(gym.Env, EzPickle):
def _create_particle(self, mass, x, y, ttl):
p = self.world.CreateDynamicBody(
position = (x, y),
position=(x, y),
angle=0.0,
fixtures = fixtureDef(
shape=circleShape(radius=2/SCALE, pos=(0, 0)),
fixtures=fixtureDef(
shape=circleShape(radius=2 / SCALE, pos=(0, 0)),
density=mass,
friction=0.1,
categoryBits=0x0100,
maskBits=0x001, # collide only with ground
restitution=0.3)
)
restitution=0.3,
),
)
p.ttl = ttl
self.particles.append(p)
self._clean_particles(False)
@@ -247,86 +267,115 @@ class LunarLander(gym.Env, EzPickle):
if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
else:
assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
assert self.action_space.contains(action), "%r (%s) invalid " % (
action,
type(action),
)
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0])
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
if (self.continuous and action[0] > 0.0) or (
not self.continuous and action == 2
):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 # 0.5..1.0
assert m_power >= 0.5 and m_power <= 1.0
else:
m_power = 1.0
ox = (tip[0] * (4/SCALE + 2 * dispersion[0]) +
side[0] * dispersion[1]) # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1] * (4/SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
ox = (
tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
) # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(3.5, # 3.5 is here to make particle speed adequate
impulse_pos[0],
impulse_pos[1],
m_power) # particles are just a decoration
p.ApplyLinearImpulse((ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True)
self.lander.ApplyLinearImpulse((-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True)
p = self._create_particle(
3.5, # 3.5 is here to make particle speed adequate
impulse_pos[0],
impulse_pos[1],
m_power,
) # particles are just a decoration
p.ApplyLinearImpulse(
(ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1, 3]):
if (self.continuous and np.abs(action[1]) > 0.5) or (
not self.continuous and action in [1, 3]
):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
assert s_power >= 0.5 and s_power <= 1.0
else:
direction = action-2
direction = action - 2
s_power = 1.0
ox = tip[0] * dispersion[0] + side[0] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
oy = -tip[1] * dispersion[0] - side[1] * (3 * dispersion[1] + direction * SIDE_ENGINE_AWAY/SCALE)
impulse_pos = (self.lander.position[0] + ox - tip[0] * 17/SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT/SCALE)
ox = tip[0] * dispersion[0] + side[0] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
oy = -tip[1] * dispersion[0] - side[1] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
impulse_pos = (
self.lander.position[0] + ox - tip[0] * 17 / SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse((ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
impulse_pos
, True)
self.lander.ApplyLinearImpulse((-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True)
p.ApplyLinearImpulse(
(ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.world.Step(1.0/FPS, 6*30, 2*30)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
(pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
vel.x*(VIEWPORT_W/SCALE/2)/FPS,
vel.y*(VIEWPORT_H/SCALE/2)/FPS,
(pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
(pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
self.lander.angle,
20.0*self.lander.angularVelocity/FPS,
20.0 * self.lander.angularVelocity / FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0
]
1.0 if self.legs[1].ground_contact else 0.0,
]
assert len(state) == 8
reward = 0
shaping = \
- 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
- 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
- 100*abs(state[4]) + 10*state[6] + 10*state[7] # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
shaping = (
-100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
- 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
- 100 * abs(state[4])
+ 10 * state[6]
+ 10 * state[7]
) # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= m_power*0.30 # less fuel spent is better, about -30 for heuristic landing
reward -= s_power*0.03
reward -= (
m_power * 0.30
) # less fuel spent is better, about -30 for heuristic landing
reward -= s_power * 0.03
done = False
if self.game_over or abs(state[0]) >= 1.0:
@@ -337,16 +386,25 @@ class LunarLander(gym.Env, EzPickle):
reward = +100
return np.array(state, dtype=np.float32), reward, done, {}
def render(self, mode='human'):
def render(self, mode="human"):
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
self.viewer.set_bounds(0, VIEWPORT_W / SCALE, 0, VIEWPORT_H / SCALE)
for obj in self.particles:
obj.ttl -= 0.15
obj.color1 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
obj.color2 = (max(0.2, 0.2+obj.ttl), max(0.2, 0.5*obj.ttl), max(0.2, 0.5*obj.ttl))
obj.color1 = (
max(0.2, 0.2 + obj.ttl),
max(0.2, 0.5 * obj.ttl),
max(0.2, 0.5 * obj.ttl),
)
obj.color2 = (
max(0.2, 0.2 + obj.ttl),
max(0.2, 0.5 * obj.ttl),
max(0.2, 0.5 * obj.ttl),
)
self._clean_particles(False)
@@ -357,23 +415,33 @@ class LunarLander(gym.Env, EzPickle):
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
t = rendering.Transform(translation=trans * f.shape.pos)
self.viewer.draw_circle(
f.shape.radius, 20, color=obj.color1
).add_attr(t)
self.viewer.draw_circle(
f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2
).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
path = [trans * v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
for x in [self.helipad_x1, self.helipad_x2]:
flagy1 = self.helipad_y
flagy2 = flagy1 + 50/SCALE
flagy2 = flagy1 + 50 / SCALE
self.viewer.draw_polyline([(x, flagy1), (x, flagy2)], color=(1, 1, 1))
self.viewer.draw_polygon([(x, flagy2), (x, flagy2-10/SCALE), (x + 25/SCALE, flagy2 - 5/SCALE)],
color=(0.8, 0.8, 0))
self.viewer.draw_polygon(
[
(x, flagy2),
(x, flagy2 - 10 / SCALE),
(x + 25 / SCALE, flagy2 - 5 / SCALE),
],
color=(0.8, 0.8, 0),
)
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer is not None:
@@ -384,6 +452,7 @@ class LunarLander(gym.Env, EzPickle):
class LunarLanderContinuous(LunarLander):
continuous = True
def heuristic(env, s):
"""
The heuristic for
@@ -405,28 +474,38 @@ def heuristic(env, s):
a: The heuristic to be fed into the step function defined above to determine the next step and reward.
"""
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
if angle_targ < -0.4: angle_targ = -0.4
hover_targ = 0.55*np.abs(s[0]) # target y should be proportional to horizontal offset
angle_targ = s[0] * 0.5 + s[2] * 1.0 # angle should point towards center
if angle_targ > 0.4:
angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
if angle_targ < -0.4:
angle_targ = -0.4
hover_targ = 0.55 * np.abs(
s[0]
) # target y should be proportional to horizontal offset
angle_todo = (angle_targ - s[4]) * 0.5 - (s[5])*1.0
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5
if s[6] or s[7]: # legs have contact
angle_todo = 0
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
hover_todo = (
-(s[3]) * 0.5
) # override to reduce fall speed, that's all we need after contact
if env.continuous:
a = np.array([hover_todo*20 - 1, -angle_todo*20])
a = np.array([hover_todo * 20 - 1, -angle_todo * 20])
a = np.clip(a, -1, +1)
else:
a = 0
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
elif angle_todo < -0.05: a = 3
elif angle_todo > +0.05: a = 1
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
a = 2
elif angle_todo < -0.05:
a = 3
elif angle_todo > +0.05:
a = 1
return a
def demo_heuristic_lander(env, seed=None, render=False):
env.seed(seed)
total_reward = 0
@@ -439,17 +518,19 @@ def demo_heuristic_lander(env, seed=None, render=False):
if render:
still_open = env.render()
if still_open == False: break
if still_open == False:
break
if steps % 20 == 0 or done:
print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
steps += 1
if done: break
if done:
break
if render:
env.close()
return total_reward
if __name__ == '__main__':
if __name__ == "__main__":
demo_heuristic_lander(LunarLander(), render=True)

View File

@@ -1,4 +1,5 @@
import pytest
try:
import Box2D
from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
@@ -6,17 +7,17 @@ except ImportError:
Box2D = None
@pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
def test_lunar_lander():
_test_lander(LunarLander(), seed=0)
@pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
def test_lunar_lander_continuous():
_test_lander(LunarLanderContinuous(), seed=0)
@pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
def _test_lander(env, seed=None, render=False):
total_reward = demo_heuristic_lander(env, seed=seed, render=render)
assert total_reward > 100

View File

@@ -3,4 +3,3 @@ from gym.envs.classic_control.mountain_car import MountainCarEnv
from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
from gym.envs.classic_control.pendulum import PendulumEnv
from gym.envs.classic_control.acrobot import AcrobotEnv

View File

@@ -6,14 +6,20 @@ from gym import core, spaces
from gym.utils import seeding
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
"William Dabney", "Jonathan P. How"]
__credits__ = [
"Alborz Geramifard",
"Robert H. Klein",
"Christoph Dann",
"William Dabney",
"Jonathan P. How",
]
__license__ = "BSD 3-Clause"
__author__ = "Christoph Dann <cdann@cdann.de>"
# SOURCE:
# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
class AcrobotEnv(core.Env):
"""
@@ -55,27 +61,24 @@ class AcrobotEnv(core.Env):
see the AcrobotLegacy class.
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 15
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 15}
dt = .2
dt = 0.2
LINK_LENGTH_1 = 1. # [m]
LINK_LENGTH_2 = 1. # [m]
LINK_MASS_1 = 1. #: [kg] mass of link 1
LINK_MASS_2 = 1. #: [kg] mass of link 2
LINK_LENGTH_1 = 1.0 # [m]
LINK_LENGTH_2 = 1.0 # [m]
LINK_MASS_1 = 1.0 #: [kg] mass of link 1
LINK_MASS_2 = 1.0 #: [kg] mass of link 2
LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1
LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2
LINK_MOI = 1. #: moments of inertia for both links
LINK_MOI = 1.0 #: moments of inertia for both links
MAX_VEL_1 = 4 * pi
MAX_VEL_2 = 9 * pi
AVAIL_TORQUE = [-1., 0., +1]
AVAIL_TORQUE = [-1.0, 0.0, +1]
torque_noise_max = 0.
torque_noise_max = 0.0
#: use dynamics equations from the nips paper or the book
book_or_nips = "book"
@@ -85,7 +88,9 @@ class AcrobotEnv(core.Env):
def __init__(self):
self.viewer = None
high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32)
high = np.array(
[1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32
)
low = -high
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.action_space = spaces.Discrete(3)
@@ -106,7 +111,9 @@ class AcrobotEnv(core.Env):
# Add noise to the force action
if self.torque_noise_max > 0:
torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
torque += self.np_random.uniform(
-self.torque_noise_max, self.torque_noise_max
)
# Now, augment the state with our force action so it can be passed to
# _dsdt
@@ -127,7 +134,7 @@ class AcrobotEnv(core.Env):
ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
self.state = ns
terminal = self._terminal()
reward = -1. if not terminal else 0.
reward = -1.0 if not terminal else 0.0
return (self._get_ob(), reward, terminal, {})
def _get_ob(self):
@@ -136,7 +143,7 @@ class AcrobotEnv(core.Env):
def _terminal(self):
s = self.state
return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.)
return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.0)
def _dsdt(self, s_augmented, t):
m1 = self.LINK_MASS_1
@@ -153,66 +160,76 @@ class AcrobotEnv(core.Env):
theta2 = s[1]
dtheta1 = s[2]
dtheta2 = s[3]
d1 = m1 * lc1 ** 2 + m2 * \
(l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * cos(theta2)) + I1 + I2
d1 = (
m1 * lc1 ** 2
+ m2 * (l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * cos(theta2))
+ I1
+ I2
)
d2 = m2 * (lc2 ** 2 + l1 * lc2 * cos(theta2)) + I2
phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.)
phi1 = - m2 * l1 * lc2 * dtheta2 ** 2 * sin(theta2) \
- 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2) \
+ (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2) + phi2
phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.0)
phi1 = (
-m2 * l1 * lc2 * dtheta2 ** 2 * sin(theta2)
- 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2)
+ (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2)
+ phi2
)
if self.book_or_nips == "nips":
# the following line is consistent with the description in the
# paper
ddtheta2 = (a + d2 / d1 * phi1 - phi2) / \
(m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
ddtheta2 = (a + d2 / d1 * phi1 - phi2) / (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
else:
# the following line is consistent with the java implementation and the
# book
ddtheta2 = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * sin(theta2) - phi2) \
/ (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
ddtheta2 = (
a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * sin(theta2) - phi2
) / (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.)
return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0)
def render(self, mode='human'):
def render(self, mode="human"):
from gym.envs.classic_control import rendering
s = self.state
if self.viewer is None:
self.viewer = rendering.Viewer(500,500)
self.viewer = rendering.Viewer(500, 500)
bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default
self.viewer.set_bounds(-bound,bound,-bound,bound)
self.viewer.set_bounds(-bound, bound, -bound, bound)
if s is None: return None
if s is None:
return None
p1 = [-self.LINK_LENGTH_1 *
cos(s[0]), self.LINK_LENGTH_1 * sin(s[0])]
p1 = [-self.LINK_LENGTH_1 * cos(s[0]), self.LINK_LENGTH_1 * sin(s[0])]
p2 = [p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]),
p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1])]
p2 = [
p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]),
p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1]),
]
xys = np.array([[0,0], p1, p2])[:,::-1]
thetas = [s[0]- pi/2, s[0]+s[1]-pi/2]
xys = np.array([[0, 0], p1, p2])[:, ::-1]
thetas = [s[0] - pi / 2, s[0] + s[1] - pi / 2]
link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]
self.viewer.draw_line((-2.2, 1), (2.2, 1))
for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
l,r,t,b = 0, llen, .1, -.1
jtransform = rendering.Transform(rotation=th, translation=(x,y))
link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
for ((x, y), th, llen) in zip(xys, thetas, link_lengths):
l, r, t, b = 0, llen, 0.1, -0.1
jtransform = rendering.Transform(rotation=th, translation=(x, y))
link = self.viewer.draw_polygon([(l, b), (l, t), (r, t), (r, b)])
link.add_attr(jtransform)
link.set_color(0,.8, .8)
circ = self.viewer.draw_circle(.1)
circ.set_color(.8, .8, 0)
link.set_color(0, 0.8, 0.8)
circ = self.viewer.draw_circle(0.1)
circ.set_color(0.8, 0.8, 0)
circ.add_attr(jtransform)
return self.viewer.render(return_rgb_array = mode=='rgb_array')
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def wrap(x, m, M):
"""Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
@@ -233,6 +250,7 @@ def wrap(x, m, M):
x = x + diff
return x
def bound(x, m, M=None):
"""Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
@@ -297,7 +315,6 @@ def rk4(derivs, y0, t, *args, **kwargs):
yout[0] = y0
for i in np.arange(len(t) - 1):
thist = t[i]

View File

@@ -58,21 +58,18 @@ class CartPoleEnv(gym.Env):
195.0 over 100 consecutive trials.
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 50
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
def __init__(self):
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = (self.masspole + self.masscart)
self.total_mass = self.masspole + self.masscart
self.length = 0.5 # actually half the pole's length
self.polemass_length = (self.masspole * self.length)
self.polemass_length = self.masspole * self.length
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
self.kinematics_integrator = 'euler'
self.kinematics_integrator = "euler"
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
@@ -80,11 +77,15 @@ class CartPoleEnv(gym.Env):
# Angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds.
high = np.array([self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max],
dtype=np.float32)
high = np.array(
[
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max,
],
dtype=np.float32,
)
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
@@ -110,11 +111,15 @@ class CartPoleEnv(gym.Env):
# For the interested reader:
# https://coneural.org/florian/papers/05_cart_pole.pdf
temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / (self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass))
temp = (
force + self.polemass_length * theta_dot ** 2 * sintheta
) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / (
self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)
)
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
if self.kinematics_integrator == 'euler':
if self.kinematics_integrator == "euler":
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
@@ -158,12 +163,12 @@ class CartPoleEnv(gym.Env):
self.steps_beyond_done = None
return np.array(self.state)
def render(self, mode='human'):
def render(self, mode="human"):
screen_width = 600
screen_height = 400
world_width = self.x_threshold * 2
scale = screen_width/world_width
scale = screen_width / world_width
carty = 100 # TOP OF CART
polewidth = 10.0
polelen = scale * (2 * self.length)
@@ -172,6 +177,7 @@ class CartPoleEnv(gym.Env):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
axleoffset = cartheight / 4.0
@@ -179,17 +185,22 @@ class CartPoleEnv(gym.Env):
self.carttrans = rendering.Transform()
cart.add_attr(self.carttrans)
self.viewer.add_geom(cart)
l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
l, r, t, b = (
-polewidth / 2,
polewidth / 2,
polelen - polewidth / 2,
-polewidth / 2,
)
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
pole.set_color(.8, .6, .4)
pole.set_color(0.8, 0.6, 0.4)
self.poletrans = rendering.Transform(translation=(0, axleoffset))
pole.add_attr(self.poletrans)
pole.add_attr(self.carttrans)
self.viewer.add_geom(pole)
self.axle = rendering.make_circle(polewidth/2)
self.axle = rendering.make_circle(polewidth / 2)
self.axle.add_attr(self.poletrans)
self.axle.add_attr(self.carttrans)
self.axle.set_color(.5, .5, .8)
self.axle.set_color(0.5, 0.5, 0.8)
self.viewer.add_geom(self.axle)
self.track = rendering.Line((0, carty), (screen_width, carty))
self.track.set_color(0, 0, 0)
@@ -202,7 +213,12 @@ class CartPoleEnv(gym.Env):
# Edit the pole polygon vertex
pole = self._pole_geom
l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
l, r, t, b = (
-polewidth / 2,
polewidth / 2,
polelen - polewidth / 2,
-polewidth / 2,
)
pole.v = [(l, b), (l, t), (r, t), (r, b)]
x = self.state
@@ -210,7 +226,7 @@ class CartPoleEnv(gym.Env):
self.carttrans.set_translation(cartx, carty)
self.poletrans.set_rotation(-x[2])
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer:

View File

@@ -53,10 +53,8 @@ class Continuous_MountainCarEnv(gym.Env):
The car position is more than 0.45
Episode length is greater than 200
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
def __init__(self, goal_velocity=0):
self.min_action = -1.0
@@ -64,7 +62,9 @@ class Continuous_MountainCarEnv(gym.Env):
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
self.goal_position = (
0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
)
self.goal_velocity = goal_velocity
self.power = 0.0015
@@ -78,15 +78,10 @@ class Continuous_MountainCarEnv(gym.Env):
self.viewer = None
self.action_space = spaces.Box(
low=self.min_action,
high=self.max_action,
shape=(1,),
dtype=np.float32
low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
)
self.observation_space = spaces.Box(
low=self.low_state,
high=self.high_state,
dtype=np.float32
low=self.low_state, high=self.high_state, dtype=np.float32
)
self.seed()
@@ -103,17 +98,20 @@ class Continuous_MountainCarEnv(gym.Env):
force = min(max(action[0], self.min_action), self.max_action)
velocity += force * self.power - 0.0025 * math.cos(3 * position)
if (velocity > self.max_speed): velocity = self.max_speed
if (velocity < -self.max_speed): velocity = -self.max_speed
if velocity > self.max_speed:
velocity = self.max_speed
if velocity < -self.max_speed:
velocity = -self.max_speed
position += velocity
if (position > self.max_position): position = self.max_position
if (position < self.min_position): position = self.min_position
if (position == self.min_position and velocity < 0): velocity = 0
if position > self.max_position:
position = self.max_position
if position < self.min_position:
position = self.min_position
if position == self.min_position and velocity < 0:
velocity = 0
# Convert a possible numpy bool to a Python bool.
done = bool(
position >= self.goal_position and velocity >= self.goal_velocity
)
done = bool(position >= self.goal_position and velocity >= self.goal_velocity)
reward = 0
if done:
@@ -128,23 +126,24 @@ class Continuous_MountainCarEnv(gym.Env):
return np.array(self.state)
def _height(self, xs):
return np.sin(3 * xs)*.45+.55
return np.sin(3 * xs) * 0.45 + 0.55
def render(self, mode='human'):
def render(self, mode="human"):
screen_width = 600
screen_height = 400
world_width = self.max_position - self.min_position
scale = screen_width/world_width
scale = screen_width / world_width
carwidth = 40
carheight = 20
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))
xys = list(zip((xs - self.min_position) * scale, ys * scale))
self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)
@@ -159,7 +158,7 @@ class Continuous_MountainCarEnv(gym.Env):
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight / 2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.set_color(0.5, 0.5, 0.5)
frontwheel.add_attr(
rendering.Transform(translation=(carwidth / 4, clearance))
)
@@ -170,26 +169,26 @@ class Continuous_MountainCarEnv(gym.Env):
rendering.Transform(translation=(-carwidth / 4, clearance))
)
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
backwheel.set_color(0.5, 0.5, 0.5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagx = (self.goal_position - self.min_position) * scale
flagy1 = self._height(self.goal_position) * scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
self.viewer.add_geom(flagpole)
flag = rendering.FilledPolygon(
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
)
flag.set_color(.8, .8, 0)
flag.set_color(0.8, 0.8, 0)
self.viewer.add_geom(flag)
pos = self.state[0]
self.cartrans.set_translation(
(pos-self.min_position) * scale, self._height(pos) * scale
(pos - self.min_position) * scale, self._height(pos) * scale
)
self.cartrans.set_rotation(math.cos(3 * pos))
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer:

View File

@@ -52,10 +52,7 @@ class MountainCarEnv(gym.Env):
Episode length is greater than 200
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
def __init__(self, goal_velocity=0):
self.min_position = -1.2
@@ -67,19 +64,13 @@ class MountainCarEnv(gym.Env):
self.force = 0.001
self.gravity = 0.0025
self.low = np.array(
[self.min_position, -self.max_speed], dtype=np.float32
)
self.high = np.array(
[self.max_position, self.max_speed], dtype=np.float32
)
self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)
self.viewer = None
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(
self.low, self.high, dtype=np.float32
)
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
self.seed()
@@ -88,19 +79,20 @@ class MountainCarEnv(gym.Env):
return [seed]
def step(self, action):
assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
assert self.action_space.contains(action), "%r (%s) invalid" % (
action,
type(action),
)
position, velocity = self.state
velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
position += velocity
position = np.clip(position, self.min_position, self.max_position)
if (position == self.min_position and velocity < 0):
if position == self.min_position and velocity < 0:
velocity = 0
done = bool(
position >= self.goal_position and velocity >= self.goal_velocity
)
done = bool(position >= self.goal_position and velocity >= self.goal_velocity)
reward = -1.0
self.state = (position, velocity)
@@ -111,9 +103,9 @@ class MountainCarEnv(gym.Env):
return np.array(self.state)
def _height(self, xs):
return np.sin(3 * xs) * .45 + .55
return np.sin(3 * xs) * 0.45 + 0.55
def render(self, mode='human'):
def render(self, mode="human"):
screen_width = 600
screen_height = 400
@@ -124,6 +116,7 @@ class MountainCarEnv(gym.Env):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
@@ -142,7 +135,7 @@ class MountainCarEnv(gym.Env):
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight / 2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.set_color(0.5, 0.5, 0.5)
frontwheel.add_attr(
rendering.Transform(translation=(carwidth / 4, clearance))
)
@@ -153,9 +146,9 @@ class MountainCarEnv(gym.Env):
rendering.Transform(translation=(-carwidth / 4, clearance))
)
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
backwheel.set_color(0.5, 0.5, 0.5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position) * scale
flagx = (self.goal_position - self.min_position) * scale
flagy1 = self._height(self.goal_position) * scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
@@ -163,16 +156,16 @@ class MountainCarEnv(gym.Env):
flag = rendering.FilledPolygon(
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
)
flag.set_color(.8, .8, 0)
flag.set_color(0.8, 0.8, 0)
self.viewer.add_geom(flag)
pos = self.state[0]
self.cartrans.set_translation(
(pos-self.min_position) * scale, self._height(pos) * scale
(pos - self.min_position) * scale, self._height(pos) * scale
)
self.cartrans.set_rotation(math.cos(3 * pos))
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def get_keys_to_action(self):
# Control with left and right arrow keys.

View File

@@ -6,31 +6,22 @@ from os import path
class PendulumEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
def __init__(self, g=10.0):
self.max_speed = 8
self.max_torque = 2.
self.dt = .05
self.max_torque = 2.0
self.dt = 0.05
self.g = g
self.m = 1.
self.l = 1.
self.m = 1.0
self.l = 1.0
self.viewer = None
high = np.array([1., 1., self.max_speed], dtype=np.float32)
high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
self.action_space = spaces.Box(
low=-self.max_torque,
high=self.max_torque, shape=(1,),
dtype=np.float32
)
self.observation_space = spaces.Box(
low=-high,
high=high,
dtype=np.float32
low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
)
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
self.seed()
@@ -48,9 +39,12 @@ class PendulumEnv(gym.Env):
u = np.clip(u, -self.max_torque, self.max_torque)[0]
self.last_u = u # for rendering
costs = angle_normalize(th) ** 2 + .1 * thdot ** 2 + .001 * (u ** 2)
costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)
newthdot = thdot + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3. / (m * l ** 2) * u) * dt
newthdot = (
thdot
+ (-3 * g / (2 * l) * np.sin(th + np.pi) + 3.0 / (m * l ** 2) * u) * dt
)
newth = th + newthdot * dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
@@ -67,21 +61,22 @@ class PendulumEnv(gym.Env):
theta, thetadot = self.state
return np.array([np.cos(theta), np.sin(theta), thetadot])
def render(self, mode='human'):
def render(self, mode="human"):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
rod = rendering.make_capsule(1, .2)
rod.set_color(.8, .3, .3)
rod = rendering.make_capsule(1, 0.2)
rod.set_color(0.8, 0.3, 0.3)
self.pole_transform = rendering.Transform()
rod.add_attr(self.pole_transform)
self.viewer.add_geom(rod)
axle = rendering.make_circle(.05)
axle = rendering.make_circle(0.05)
axle.set_color(0, 0, 0)
self.viewer.add_geom(axle)
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
self.img = rendering.Image(fname, 1., 1.)
self.img = rendering.Image(fname, 1.0, 1.0)
self.imgtrans = rendering.Transform()
self.img.add_attr(self.imgtrans)
@@ -90,7 +85,7 @@ class PendulumEnv(gym.Env):
if self.last_u:
self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer:
@@ -99,4 +94,4 @@ class PendulumEnv(gym.Env):
def angle_normalize(x):
return (((x+np.pi) % (2*np.pi)) - np.pi)
return ((x + np.pi) % (2 * np.pi)) - np.pi

View File

@@ -5,8 +5,8 @@ import os
import sys
if "Apple" in sys.version:
if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
if "DYLD_FALLBACK_LIBRARY_PATH" in os.environ:
os.environ["DYLD_FALLBACK_LIBRARY_PATH"] += ":/usr/lib"
# (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
from gym import error
@@ -14,28 +14,33 @@ from gym import error
try:
import pyglet
except ImportError as e:
raise ImportError('''
raise ImportError(
"""
Cannot import pyglet.
HINT: you can install pyglet directly via 'pip install pyglet'.
But if you really just want to install all Gym dependencies and not have to think about it,
'pip install -e .[all]' or 'pip install gym[all]' will do it.
''')
"""
)
try:
from pyglet.gl import *
except ImportError as e:
raise ImportError('''
raise ImportError(
"""
Error occurred while running `from pyglet.gl import *`
HINT: make sure you have OpenGL installed. On Ubuntu, you can run 'apt-get install python-opengl'.
If you're running on a server, you may need a virtual frame buffer; something like this should work:
'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'
''')
"""
)
import math
import numpy as np
RAD2DEG = 57.29577951308232
def get_display(spec):
"""Convert a display specification (such as :0) into an actual Display
object.
@@ -49,17 +54,30 @@ def get_display(spec):
elif isinstance(spec, str):
return pyglet.canvas.Display(spec)
else:
raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
raise error.Error(
"Invalid display specification: {}. (Must be a string like :0 or None.)".format(
spec
)
)
def get_window(width, height, display, **kwargs):
"""
Will create a pyglet window from the display specification provided.
"""
screen = display.get_screens() #available screens
config = screen[0].get_best_config() #selecting the first screen
context = config.create_context(None) #create GL context
screen = display.get_screens() # available screens
config = screen[0].get_best_config() # selecting the first screen
context = config.create_context(None) # create GL context
return pyglet.window.Window(
width=width,
height=height,
display=display,
config=config,
context=context,
**kwargs
)
return pyglet.window.Window(width=width, height=height, display=display, config=config, context=context, **kwargs)
class Viewer(object):
def __init__(self, width, height, display=None):
@@ -88,11 +106,11 @@ class Viewer(object):
def set_bounds(self, left, right, bottom, top):
assert right > left and top > bottom
scalex = self.width/(right-left)
scaley = self.height/(top-bottom)
scalex = self.width / (right - left)
scaley = self.height / (top - bottom)
self.transform = Transform(
translation=(-left*scalex, -bottom*scaley),
scale=(scalex, scaley))
translation=(-left * scalex, -bottom * scaley), scale=(scalex, scaley)
)
def add_geom(self, geom):
self.geoms.append(geom)
@@ -101,7 +119,7 @@ class Viewer(object):
self.onetime_geoms.append(geom)
def render(self, return_rgb_array=False):
glClearColor(1,1,1,1)
glClearColor(1, 1, 1, 1)
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
@@ -123,7 +141,7 @@ class Viewer(object):
# the boundary.) So we use the buffer height/width rather
# than the requested one.
arr = arr.reshape(buffer.height, buffer.width, 4)
arr = arr[::-1,:,0:3]
arr = arr[::-1, :, 0:3]
self.window.flip()
self.onetime_geoms = []
return arr if return_rgb_array else self.isopen
@@ -155,140 +173,181 @@ class Viewer(object):
def get_array(self):
self.window.flip()
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
image_data = (
pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
)
self.window.flip()
arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
arr = arr.reshape(self.height, self.width, 4)
return arr[::-1,:,0:3]
return arr[::-1, :, 0:3]
def __del__(self):
self.close()
def _add_attrs(geom, attrs):
if "color" in attrs:
geom.set_color(*attrs["color"])
if "linewidth" in attrs:
geom.set_linewidth(attrs["linewidth"])
class Geom(object):
def __init__(self):
self._color=Color((0, 0, 0, 1.0))
self._color = Color((0, 0, 0, 1.0))
self.attrs = [self._color]
def render(self):
for attr in reversed(self.attrs):
attr.enable()
self.render1()
for attr in self.attrs:
attr.disable()
def render1(self):
raise NotImplementedError
def add_attr(self, attr):
self.attrs.append(attr)
def set_color(self, r, g, b):
self._color.vec4 = (r, g, b, 1)
class Attr(object):
def enable(self):
raise NotImplementedError
def disable(self):
pass
class Transform(Attr):
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1, 1)):
self.set_translation(*translation)
self.set_rotation(rotation)
self.set_scale(*scale)
def enable(self):
glPushMatrix()
glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
glTranslatef(
self.translation[0], self.translation[1], 0
) # translate to GL loc ppint
glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
glScalef(self.scale[0], self.scale[1], 1)
def disable(self):
glPopMatrix()
def set_translation(self, newx, newy):
self.translation = (float(newx), float(newy))
def set_rotation(self, new):
self.rotation = float(new)
def set_scale(self, newx, newy):
self.scale = (float(newx), float(newy))
class Color(Attr):
def __init__(self, vec4):
self.vec4 = vec4
def enable(self):
glColor4f(*self.vec4)
class LineStyle(Attr):
def __init__(self, style):
self.style = style
def enable(self):
glEnable(GL_LINE_STIPPLE)
glLineStipple(1, self.style)
def disable(self):
glDisable(GL_LINE_STIPPLE)
class LineWidth(Attr):
def __init__(self, stroke):
self.stroke = stroke
def enable(self):
glLineWidth(self.stroke)
class Point(Geom):
def __init__(self):
Geom.__init__(self)
def render1(self):
glBegin(GL_POINTS) # draw point
glBegin(GL_POINTS) # draw point
glVertex3f(0.0, 0.0, 0.0)
glEnd()
class FilledPolygon(Geom):
def __init__(self, v):
Geom.__init__(self)
self.v = v
def render1(self):
if len(self.v) == 4 : glBegin(GL_QUADS)
elif len(self.v) > 4 : glBegin(GL_POLYGON)
else: glBegin(GL_TRIANGLES)
if len(self.v) == 4:
glBegin(GL_QUADS)
elif len(self.v) > 4:
glBegin(GL_POLYGON)
else:
glBegin(GL_TRIANGLES)
for p in self.v:
glVertex3f(p[0], p[1],0) # draw each vertex
glVertex3f(p[0], p[1], 0) # draw each vertex
glEnd()
def make_circle(radius=10, res=30, filled=True):
points = []
for i in range(res):
ang = 2*math.pi*i / res
points.append((math.cos(ang)*radius, math.sin(ang)*radius))
ang = 2 * math.pi * i / res
points.append((math.cos(ang) * radius, math.sin(ang) * radius))
if filled:
return FilledPolygon(points)
else:
return PolyLine(points, True)
def make_polygon(v, filled=True):
if filled: return FilledPolygon(v)
else: return PolyLine(v, True)
if filled:
return FilledPolygon(v)
else:
return PolyLine(v, True)
def make_polyline(v):
return PolyLine(v, False)
def make_capsule(length, width):
l, r, t, b = 0, length, width/2, -width/2
box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
circ0 = make_circle(width/2)
circ1 = make_circle(width/2)
l, r, t, b = 0, length, width / 2, -width / 2
box = make_polygon([(l, b), (l, t), (r, t), (r, b)])
circ0 = make_circle(width / 2)
circ1 = make_circle(width / 2)
circ1.add_attr(Transform(translation=(length, 0)))
geom = Compound([box, circ0, circ1])
return geom
class Compound(Geom):
def __init__(self, gs):
Geom.__init__(self)
self.gs = gs
for g in self.gs:
g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
def render1(self):
for g in self.gs:
g.render()
class PolyLine(Geom):
def __init__(self, v, close):
Geom.__init__(self)
@@ -296,14 +355,17 @@ class PolyLine(Geom):
self.close = close
self.linewidth = LineWidth(1)
self.add_attr(self.linewidth)
def render1(self):
glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
for p in self.v:
glVertex3f(p[0], p[1],0) # draw each vertex
glVertex3f(p[0], p[1], 0) # draw each vertex
glEnd()
def set_linewidth(self, x):
self.linewidth.stroke = x
class Line(Geom):
def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
Geom.__init__(self)
@@ -318,6 +380,7 @@ class Line(Geom):
glVertex2f(*self.end)
glEnd()
class Image(Geom):
def __init__(self, fname, width, height):
Geom.__init__(self)
@@ -327,17 +390,23 @@ class Image(Geom):
img = pyglet.image.load(fname)
self.img = img
self.flip = False
def render1(self):
self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
self.img.blit(
-self.width / 2, -self.height / 2, width=self.width, height=self.height
)
# ================================================================
class SimpleImageViewer(object):
def __init__(self, display=None, maxwidth=500):
self.window = None
self.isopen = False
self.display = get_display(display)
self.maxwidth = maxwidth
def imshow(self, arr):
if self.window is None:
height, width, _channels = arr.shape
@@ -345,7 +414,13 @@ class SimpleImageViewer(object):
scale = self.maxwidth / width
width = int(scale * width)
height = int(scale * height)
self.window = get_window(width=width, height=height, display=self.display, vsync=False, resizable=True)
self.window = get_window(
width=width,
height=height,
display=self.display,
vsync=False,
resizable=True,
)
self.width = width
self.height = height
self.isopen = True
@@ -360,18 +435,19 @@ class SimpleImageViewer(object):
self.isopen = False
assert len(arr.shape) == 3, "You passed in an image with the wrong number shape"
image = pyglet.image.ImageData(arr.shape[1], arr.shape[0],
'RGB', arr.tobytes(), pitch=arr.shape[1]*-3)
gl.glTexParameteri(gl.GL_TEXTURE_2D,
gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
image = pyglet.image.ImageData(
arr.shape[1], arr.shape[0], "RGB", arr.tobytes(), pitch=arr.shape[1] * -3
)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
texture = image.get_texture()
texture.width = self.width
texture.height = self.height
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
texture.blit(0, 0) # draw
texture.blit(0, 0) # draw
self.window.flip()
def close(self):
if self.isopen and sys.meta_path:
# ^^^ check sys.meta_path to avoid 'ImportError: sys.meta_path is None, Python is likely shutting down'

View File

@@ -1,4 +1,5 @@
from gym.envs.mujoco.mujoco_env import MujocoEnv
# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly
from gym.envs.mujoco.ant import AntEnv

View File

@@ -2,42 +2,53 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5)
utils.EzPickle.__init__(self)
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
forward_reward = (xposafter - xposbefore) / self.dt
ctrl_cost = 0.5 * np.square(a).sum()
contact_cost = (
0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
)
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
notdone = np.isfinite(state).all() \
and state[2] >= 0.2 and state[2] <= 1.0
notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
done = not notdone
ob = self._get_obs()
return ob, reward, done, dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward)
return (
ob,
reward,
done,
dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward,
),
)
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
])
return np.concatenate(
[
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.1, high=0.1
)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -4,21 +4,23 @@ from gym.envs.mujoco import mujoco_env
DEFAULT_CAMERA_CONFIG = {
'distance': 4.0,
"distance": 4.0,
}
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
xml_file='ant.xml',
ctrl_cost_weight=0.5,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True):
def __init__(
self,
xml_file="ant.xml",
ctrl_cost_weight=0.5,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
):
utils.EzPickle.__init__(**locals())
self._ctrl_cost_weight = ctrl_cost_weight
@@ -33,16 +35,17 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)
exclude_current_positions_from_observation
)
mujoco_env.MujocoEnv.__init__(self, xml_file, 5)
@property
def healthy_reward(self):
return float(
self.is_healthy
or self._terminate_when_unhealthy
) * self._healthy_reward
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@@ -58,21 +61,20 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
@property
def contact_cost(self):
contact_cost = self._contact_cost_weight * np.sum(
np.square(self.contact_forces))
np.square(self.contact_forces)
)
return contact_cost
@property
def is_healthy(self):
state = self.state_vector()
min_z, max_z = self._healthy_z_range
is_healthy = (np.isfinite(state).all() and min_z <= state[2] <= max_z)
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
return is_healthy
@property
def done(self):
done = (not self.is_healthy
if self._terminate_when_unhealthy
else False)
done = not self.is_healthy if self._terminate_when_unhealthy else False
return done
def step(self, action):
@@ -96,18 +98,16 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
done = self.done
observation = self._get_obs()
info = {
'reward_forward': forward_reward,
'reward_ctrl': -ctrl_cost,
'reward_contact': -contact_cost,
'reward_survive': healthy_reward,
'x_position': xy_position_after[0],
'y_position': xy_position_after[1],
'distance_from_origin': np.linalg.norm(xy_position_after, ord=2),
'x_velocity': x_velocity,
'y_velocity': y_velocity,
'forward_reward': forward_reward,
"reward_forward": forward_reward,
"reward_ctrl": -ctrl_cost,
"reward_contact": -contact_cost,
"reward_survive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
return observation, reward, done, info
@@ -129,9 +129,11 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
self.model.nv)
self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()

View File

@@ -2,9 +2,10 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 5)
utils.EzPickle.__init__(self)
def step(self, action):
@@ -12,21 +13,25 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.do_simulation(action, self.frame_skip)
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
reward_ctrl = -0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore) / self.dt
reward = reward_ctrl + reward_run
done = False
return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
])
return np.concatenate(
[
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
qpos = self.init_qpos + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -4,17 +4,19 @@ from gym.envs.mujoco import mujoco_env
DEFAULT_CAMERA_CONFIG = {
'distance': 4.0,
"distance": 4.0,
}
class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
xml_file='half_cheetah.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True):
def __init__(
self,
xml_file="half_cheetah.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
):
utils.EzPickle.__init__(**locals())
self._forward_reward_weight = forward_reward_weight
@@ -24,7 +26,8 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)
exclude_current_positions_from_observation
)
mujoco_env.MujocoEnv.__init__(self, xml_file, 5)
@@ -36,8 +39,7 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = ((x_position_after - x_position_before)
/ self.dt)
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
@@ -47,11 +49,10 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward = forward_reward - ctrl_cost
done = False
info = {
'x_position': x_position_after,
'x_velocity': x_velocity,
'reward_run': forward_reward,
'reward_ctrl': -ctrl_cost
"x_position": x_position_after,
"x_velocity": x_velocity,
"reward_run": forward_reward,
"reward_ctrl": -ctrl_cost,
}
return observation, reward, done, info
@@ -71,9 +72,11 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
self.model.nv)
self.model.nv
)
self.set_state(qpos, qvel)

View File

@@ -2,9 +2,10 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4)
utils.EzPickle.__init__(self)
def step(self, a):
@@ -16,20 +17,27 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
s = self.state_vector()
done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
(height > .7) and (abs(ang) < .2))
done = not (
np.isfinite(s).all()
and (np.abs(s[2:]) < 100).all()
and (height > 0.7)
and (abs(ang) < 0.2)
)
ob = self._get_obs()
return ob, reward, done, {}
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[1:],
np.clip(self.sim.data.qvel.flat, -10, 10)
])
return np.concatenate(
[self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qpos = self.init_qpos + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -4,25 +4,27 @@ from gym import utils
DEFAULT_CAMERA_CONFIG = {
'trackbodyid': 2,
'distance': 3.0,
'lookat': np.array((0.0, 0.0, 1.15)),
'elevation': -20.0,
"trackbodyid": 2,
"distance": 3.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
xml_file='hopper.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float('inf')),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True):
def __init__(
self,
xml_file="hopper.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float("inf")),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
):
utils.EzPickle.__init__(**locals())
self._forward_reward_weight = forward_reward_weight
@@ -39,16 +41,17 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)
exclude_current_positions_from_observation
)
mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
@property
def healthy_reward(self):
return float(
self.is_healthy
or self._terminate_when_unhealthy
) * self._healthy_reward
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@@ -63,8 +66,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_state = np.all(
np.logical_and(min_state < state, state < max_state))
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
@@ -74,15 +76,12 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
@property
def done(self):
done = (not self.is_healthy
if self._terminate_when_unhealthy
else False)
done = not self.is_healthy if self._terminate_when_unhealthy else False
return done
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = np.clip(
self.sim.data.qvel.flat.copy(), -10, 10)
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
@@ -94,8 +93,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = ((x_position_after - x_position_before)
/ self.dt)
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
@@ -109,8 +107,8 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward = rewards - costs
done = self.done
info = {
'x_position': x_position_after,
'x_velocity': x_velocity,
"x_position": x_position_after,
"x_velocity": x_velocity,
}
return observation, reward, done, info
@@ -120,9 +118,11 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)

View File

@@ -2,24 +2,30 @@ import numpy as np
from gym.envs.mujoco import mujoco_env
from gym import utils
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5)
utils.EzPickle.__init__(self)
def _get_obs(self):
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat])
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
pos_before = mass_center(self.model, self.sim)
@@ -29,18 +35,33 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
data = self.sim.data
lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
return (
self._get_obs(),
reward,
done,
dict(
reward_linvel=lin_vel_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_alive=alive_bonus,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()

View File

@@ -4,10 +4,10 @@ from gym import utils
DEFAULT_CAMERA_CONFIG = {
'trackbodyid': 1,
'distance': 4.0,
'lookat': np.array((0.0, 0.0, 2.0)),
'elevation': -20.0,
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 2.0)),
"elevation": -20.0,
}
@@ -18,17 +18,19 @@ def mass_center(model, sim):
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
xml_file='humanoid.xml',
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
contact_cost_weight=5e-7,
contact_cost_range=(-np.inf, 10.0),
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True):
def __init__(
self,
xml_file="humanoid.xml",
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
contact_cost_weight=5e-7,
contact_cost_range=(-np.inf, 10.0),
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True,
):
utils.EzPickle.__init__(**locals())
self._forward_reward_weight = forward_reward_weight
@@ -42,27 +44,26 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)
exclude_current_positions_from_observation
)
mujoco_env.MujocoEnv.__init__(self, xml_file, 5)
@property
def healthy_reward(self):
return float(
self.is_healthy
or self._terminate_when_unhealthy
) * self._healthy_reward
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(
np.square(self.sim.data.ctrl))
control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl))
return control_cost
@property
def contact_cost(self):
contact_forces = self.sim.data.cfrc_ext
contact_cost = self._contact_cost_weight * np.sum(
np.square(contact_forces))
contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces))
min_cost, max_cost = self._contact_cost_range
contact_cost = np.clip(contact_cost, min_cost, max_cost)
return contact_cost
@@ -76,9 +77,7 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
@property
def done(self):
done = ((not self.is_healthy)
if self._terminate_when_unhealthy
else False)
done = (not self.is_healthy) if self._terminate_when_unhealthy else False
return done
def _get_obs(self):
@@ -94,14 +93,16 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
if self._exclude_current_positions_from_observation:
position = position[2:]
return np.concatenate((
position,
velocity,
com_inertia,
com_velocity,
actuator_forces,
external_contact_forces,
))
return np.concatenate(
(
position,
velocity,
com_inertia,
com_velocity,
actuator_forces,
external_contact_forces,
)
)
def step(self, action):
xy_position_before = mass_center(self.model, self.sim)
@@ -124,18 +125,16 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward = rewards - costs
done = self.done
info = {
'reward_linvel': forward_reward,
'reward_quadctrl': -ctrl_cost,
'reward_alive': healthy_reward,
'reward_impact': -contact_cost,
'x_position': xy_position_after[0],
'y_position': xy_position_after[1],
'distance_from_origin': np.linalg.norm(xy_position_after, ord=2),
'x_velocity': x_velocity,
'y_velocity': y_velocity,
'forward_reward': forward_reward,
"reward_linvel": forward_reward,
"reward_quadctrl": -ctrl_cost,
"reward_alive": healthy_reward,
"reward_impact": -contact_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
return observation, reward, done, info
@@ -145,9 +144,11 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()

View File

@@ -2,19 +2,24 @@ from gym.envs.mujoco import mujoco_env
from gym import utils
import numpy as np
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5)
utils.EzPickle.__init__(self)
def _get_obs(self):
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat])
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
self.do_simulation(a, self.frame_skip)
@@ -23,18 +28,32 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
done = bool(False)
return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
return (
self._get_obs(),
reward,
done,
dict(
reward_linup=uph_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()

View File

@@ -2,10 +2,10 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "inverted_double_pendulum.xml", 5)
utils.EzPickle.__init__(self)
def step(self, action):
@@ -14,25 +14,28 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
vel_penalty = 1e-3 * v1 ** 2 + 5e-3 * v2 ** 2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
done = bool(y <= 1)
return ob, r, done, {}
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel()
return np.concatenate(
[
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10),
]
).ravel()
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel + self.np_random.randn(self.model.nv) * 0.1,
)
return self._get_obs()

View File

@@ -2,22 +2,27 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
mujoco_env.MujocoEnv.__init__(self, "inverted_pendulum.xml", 2)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= 0.2)
done = not notdone
return ob, reward, done, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.01, high=0.01
)
qvel = self.init_qvel + self.np_random.uniform(
size=self.model.nv, low=-0.01, high=0.01
)
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -11,20 +11,28 @@ import gym
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
raise error.DependencyNotInstalled(
"{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
e
)
)
DEFAULT_SIZE = 500
def convert_observation_to_space(observation):
if isinstance(observation, dict):
space = spaces.Dict(OrderedDict([
(key, convert_observation_to_space(value))
for key, value in observation.items()
]))
space = spaces.Dict(
OrderedDict(
[
(key, convert_observation_to_space(value))
for key, value in observation.items()
]
)
)
elif isinstance(observation, np.ndarray):
low = np.full(observation.shape, -float('inf'), dtype=np.float32)
high = np.full(observation.shape, float('inf'), dtype=np.float32)
low = np.full(observation.shape, -float("inf"), dtype=np.float32)
high = np.full(observation.shape, float("inf"), dtype=np.float32)
space = spaces.Box(low, high, dtype=observation.dtype)
else:
raise NotImplementedError(type(observation), observation)
@@ -33,8 +41,7 @@ def convert_observation_to_space(observation):
class MujocoEnv(gym.Env):
"""Superclass for all MuJoCo environments.
"""
"""Superclass for all MuJoCo environments."""
def __init__(self, model_path, frame_skip):
if model_path.startswith("/"):
@@ -51,8 +58,8 @@ class MujocoEnv(gym.Env):
self._viewers = {}
self.metadata = {
'render.modes': ['human', 'rgb_array', 'depth_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
"render.modes": ["human", "rgb_array", "depth_array"],
"video.frames_per_second": int(np.round(1.0 / self.dt)),
}
self.init_qpos = self.sim.data.qpos.ravel().copy()
@@ -110,8 +117,9 @@ class MujocoEnv(gym.Env):
def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
old_state = self.sim.get_state()
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
old_state.act, old_state.udd_state)
new_state = mujoco_py.MjSimState(
old_state.time, qpos, qvel, old_state.act, old_state.udd_state
)
self.sim.set_state(new_state)
self.sim.forward()
@@ -124,39 +132,43 @@ class MujocoEnv(gym.Env):
for _ in range(n_frames):
self.sim.step()
def render(self,
mode='human',
width=DEFAULT_SIZE,
height=DEFAULT_SIZE,
camera_id=None,
camera_name=None):
if mode == 'rgb_array' or mode == 'depth_array':
def render(
self,
mode="human",
width=DEFAULT_SIZE,
height=DEFAULT_SIZE,
camera_id=None,
camera_name=None,
):
if mode == "rgb_array" or mode == "depth_array":
if camera_id is not None and camera_name is not None:
raise ValueError("Both `camera_id` and `camera_name` cannot be"
" specified at the same time.")
raise ValueError(
"Both `camera_id` and `camera_name` cannot be"
" specified at the same time."
)
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = 'track'
camera_name = "track"
if camera_id is None and camera_name in self.model._camera_name2id:
camera_id = self.model.camera_name2id(camera_name)
self._get_viewer(mode).render(width, height, camera_id=camera_id)
if mode == 'rgb_array':
if mode == "rgb_array":
# window size used for old mujoco-py:
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif mode == 'depth_array':
elif mode == "depth_array":
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif mode == 'human':
elif mode == "human":
self._get_viewer(mode).render()
def close(self):
@@ -168,9 +180,9 @@ class MujocoEnv(gym.Env):
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == 'human':
if mode == "human":
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode == 'rgb_array' or mode == 'depth_array':
elif mode == "rgb_array" or mode == "depth_array":
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
self.viewer_setup()
@@ -181,7 +193,4 @@ class MujocoEnv(gym.Env):
return self.data.get_body_xpos(body_name)
def state_vector(self):
return np.concatenate([
self.sim.data.qpos.flat,
self.sim.data.qvel.flat
])
return np.concatenate([self.sim.data.qpos.flat, self.sim.data.qvel.flat])

View File

@@ -4,25 +4,25 @@ from gym.envs.mujoco import mujoco_env
import mujoco_py
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
reward_near = - np.linalg.norm(vec_1)
reward_dist = - np.linalg.norm(vec_2)
reward_ctrl = - np.square(a).sum()
reward_near = -np.linalg.norm(vec_1)
reward_dist = -np.linalg.norm(vec_2)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = -1
@@ -33,25 +33,31 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.goal_pos = np.asarray([0, 0])
while True:
self.cylinder_pos = np.concatenate([
self.cylinder_pos = np.concatenate(
[
self.np_random.uniform(low=-0.3, high=0, size=1),
self.np_random.uniform(low=-0.2, high=0.2, size=1)])
self.np_random.uniform(low=-0.2, high=0.2, size=1),
]
)
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
break
qpos[-4:-2] = self.cylinder_pos
qpos[-2:] = self.goal_pos
qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
high=0.005, size=self.model.nv)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
])
return np.concatenate(
[
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
]
)

View File

@@ -2,15 +2,16 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
mujoco_env.MujocoEnv.__init__(self, "reacher.xml", 2)
def step(self, a):
vec = self.get_body_com("fingertip")-self.get_body_com("target")
reward_dist = - np.linalg.norm(vec)
reward_ctrl = - np.square(a).sum()
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist = -np.linalg.norm(vec)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
@@ -21,23 +22,30 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.viewer.cam.trackbodyid = 0
def reset_model(self):
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
qpos = (
self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
+ self.init_qpos
)
while True:
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
if np.linalg.norm(self.goal) < 0.2:
break
qpos[-2:] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:2]
return np.concatenate([
np.cos(theta),
np.sin(theta),
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target")
])
return np.concatenate(
[
np.cos(theta),
np.sin(theta),
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target"),
]
)

View File

@@ -2,13 +2,14 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
self._striked = False
self._min_strike_dist = np.inf
self.strike_threshold = 0.1
mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "striker.xml", 5)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
@@ -21,19 +22,18 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
if self._striked:
vec_3 = self.get_body_com("object") - self._strike_pos
reward_near = - np.linalg.norm(vec_3)
reward_near = -np.linalg.norm(vec_3)
else:
reward_near = - np.linalg.norm(vec_1)
reward_near = -np.linalg.norm(vec_1)
reward_dist = - np.linalg.norm(self._min_strike_dist)
reward_ctrl = - np.square(a).sum()
reward_dist = -np.linalg.norm(self._min_strike_dist)
reward_ctrl = -np.square(a).sum()
reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
@@ -48,9 +48,12 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.ball = np.array([0.5, -0.175])
while True:
self.goal = np.concatenate([
self.goal = np.concatenate(
[
self.np_random.uniform(low=0.15, high=0.7, size=1),
self.np_random.uniform(low=0.1, high=1.0, size=1)])
self.np_random.uniform(low=0.1, high=1.0, size=1),
]
)
if np.linalg.norm(self.ball - self.goal) > 0.17:
break
@@ -59,17 +62,20 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
diff = self.ball - self.goal
angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
qpos[-1] = angle / 3.14
qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
size=self.model.nv)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nv
)
qvel[7:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
])
return np.concatenate(
[
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
]
)

View File

@@ -2,9 +2,10 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
mujoco_env.MujocoEnv.__init__(self, "swimmer.xml", 4)
utils.EzPickle.__init__(self)
def step(self, a):
@@ -13,7 +14,7 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.do_simulation(a, self.frame_skip)
xposafter = self.sim.data.qpos[0]
reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
@@ -25,7 +26,9 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv),
)
return self._get_obs()

View File

@@ -7,12 +7,14 @@ DEFAULT_CAMERA_CONFIG = {}
class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
xml_file='swimmer.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-4,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True):
def __init__(
self,
xml_file="swimmer.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-4,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
):
utils.EzPickle.__init__(**locals())
self._forward_reward_weight = forward_reward_weight
@@ -21,7 +23,8 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)
exclude_current_positions_from_observation
)
mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
@@ -45,16 +48,14 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward = forward_reward - ctrl_cost
done = False
info = {
'reward_fwd': forward_reward,
'reward_ctrl': -ctrl_cost,
'x_position': xy_position_after[0],
'y_position': xy_position_after[1],
'distance_from_origin': np.linalg.norm(xy_position_after, ord=2),
'x_velocity': x_velocity,
'y_velocity': y_velocity,
'forward_reward': forward_reward,
"reward_fwd": forward_reward,
"reward_ctrl": -ctrl_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
return observation, reward, done, info
@@ -74,9 +75,11 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)

View File

@@ -2,12 +2,13 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
self._ball_hit_ground = False
self._ball_hit_location = None
mujoco_env.MujocoEnv.__init__(self, 'thrower.xml', 5)
mujoco_env.MujocoEnv.__init__(self, "thrower.xml", 5)
def step(self, a):
ball_xy = self.get_body_com("ball")[:2]
@@ -22,14 +23,13 @@ class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy)
else:
reward_dist = -np.linalg.norm(ball_xy - goal_xy)
reward_ctrl = - np.square(a).sum()
reward_ctrl = -np.square(a).sum()
reward = reward_dist + 0.002 * reward_ctrl
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
@@ -40,21 +40,28 @@ class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._ball_hit_location = None
qpos = self.init_qpos
self.goal = np.array([self.np_random.uniform(low=-0.3, high=0.3),
self.np_random.uniform(low=-0.3, high=0.3)])
self.goal = np.array(
[
self.np_random.uniform(low=-0.3, high=0.3),
self.np_random.uniform(low=-0.3, high=0.3),
]
)
qpos[-9:-7] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
high=0.005, size=self.model.nv)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[7:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("r_wrist_roll_link"),
self.get_body_com("ball"),
self.get_body_com("goal"),
])
return np.concatenate(
[
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("r_wrist_roll_link"),
self.get_body_com("ball"),
self.get_body_com("goal"),
]
)

View File

@@ -2,8 +2,8 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
utils.EzPickle.__init__(self)
@@ -13,11 +13,10 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = ((posafter - posbefore) / self.dt)
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
done = not (height > 0.8 and height < 2.0 and
ang > -1.0 and ang < 1.0)
done = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
ob = self._get_obs()
return ob, reward, done, {}
@@ -28,8 +27,10 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
self.init_qpos
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv),
)
return self._get_obs()

View File

@@ -4,24 +4,26 @@ from gym import utils
DEFAULT_CAMERA_CONFIG = {
'trackbodyid': 2,
'distance': 4.0,
'lookat': np.array((0.0, 0.0, 1.15)),
'elevation': -20.0,
"trackbodyid": 2,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
xml_file='walker2d.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.8, 2.0),
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True):
def __init__(
self,
xml_file="walker2d.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.8, 2.0),
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
):
utils.EzPickle.__init__(**locals())
self._forward_reward_weight = forward_reward_weight
@@ -36,16 +38,17 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)
exclude_current_positions_from_observation
)
mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
@property
def healthy_reward(self):
return float(
self.is_healthy
or self._terminate_when_unhealthy
) * self._healthy_reward
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@@ -66,15 +69,12 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
@property
def done(self):
done = (not self.is_healthy
if self._terminate_when_unhealthy
else False)
done = not self.is_healthy if self._terminate_when_unhealthy else False
return done
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = np.clip(
self.sim.data.qvel.flat.copy(), -10, 10)
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
@@ -86,8 +86,7 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = ((x_position_after - x_position_before)
/ self.dt)
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
@@ -101,8 +100,8 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward = rewards - costs
done = self.done
info = {
'x_position': x_position_after,
'x_velocity': x_velocity,
"x_position": x_position_after,
"x_velocity": x_velocity,
}
return observation, reward, done, info
@@ -112,9 +111,11 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)

View File

@@ -10,7 +10,7 @@ from gym import error, logger
#
# 2016-10-31: We're experimentally expanding the environment ID format
# to include an optional username.
env_id_re = re.compile(r'^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$')
env_id_re = re.compile(r"^(?:[\w:-]+\/)?([\w:.-]+)-v(\d+)$")
def load(name):
@@ -34,7 +34,15 @@ class EnvSpec(object):
"""
def __init__(self, id, entry_point=None, reward_threshold=None, nondeterministic=False, max_episode_steps=None, kwargs=None):
def __init__(
self,
id,
entry_point=None,
reward_threshold=None,
nondeterministic=False,
max_episode_steps=None,
kwargs=None,
):
self.id = id
self.entry_point = entry_point
self.reward_threshold = reward_threshold
@@ -44,13 +52,21 @@ class EnvSpec(object):
match = env_id_re.search(id)
if not match:
raise error.Error('Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id, env_id_re.pattern))
self._env_name = match.group(1)
raise error.Error(
"Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)".format(
id, env_id_re.pattern
)
)
self._env_name = match.group(1)
def make(self, **kwargs):
"""Instantiates an instance of the environment with appropriate kwargs"""
if self.entry_point is None:
raise error.Error('Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)'.format(self.id))
raise error.Error(
"Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)".format(
self.id
)
)
_kwargs = self._kwargs.copy()
_kwargs.update(kwargs)
if callable(self.entry_point):
@@ -83,19 +99,24 @@ class EnvRegistry(object):
def make(self, path, **kwargs):
if len(kwargs) > 0:
logger.info('Making new env: %s (%s)', path, kwargs)
logger.info("Making new env: %s (%s)", path, kwargs)
else:
logger.info('Making new env: %s', path)
logger.info("Making new env: %s", path)
spec = self.spec(path)
env = spec.make(**kwargs)
# We used to have people override _reset/_step rather than
# reset/step. Set _gym_disable_underscore_compat = True on
# your environment if you use these methods and don't want
# compatibility code to be invoked.
if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(env, "_gym_disable_underscore_compat", False):
if (
hasattr(env, "_reset")
and hasattr(env, "_step")
and not getattr(env, "_gym_disable_underscore_compat", False)
):
patch_deprecated_methods(env)
if env.spec.max_episode_steps is not None:
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env, max_episode_steps=env.spec.max_episode_steps)
return env
@@ -103,19 +124,27 @@ class EnvRegistry(object):
return self.env_specs.values()
def spec(self, path):
if ':' in path:
mod_name, _sep, id = path.partition(':')
if ":" in path:
mod_name, _sep, id = path.partition(":")
try:
importlib.import_module(mod_name)
# catch ImportError for python2.7 compatibility
except ImportError:
raise error.Error('A module ({}) was specified for the environment but was not found, make sure the package is installed with `pip install` before calling `gym.make()`'.format(mod_name))
raise error.Error(
"A module ({}) was specified for the environment but was not found, make sure the package is installed with `pip install` before calling `gym.make()`".format(
mod_name
)
)
else:
id = path
match = env_id_re.search(id)
if not match:
raise error.Error('Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)'.format(id.encode('utf-8'), env_id_re.pattern))
raise error.Error(
"Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)".format(
id.encode("utf-8"), env_id_re.pattern
)
)
try:
return self.env_specs[id]
@@ -123,32 +152,45 @@ class EnvRegistry(object):
# Parse the env name and check to see if it matches the non-version
# part of a valid env (could also check the exact number here)
env_name = match.group(1)
matching_envs = [valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items()
if env_name == valid_env_spec._env_name]
matching_envs = [
valid_env_name
for valid_env_name, valid_env_spec in self.env_specs.items()
if env_name == valid_env_spec._env_name
]
if matching_envs:
raise error.DeprecatedEnv('Env {} not found (valid versions include {})'.format(id, matching_envs))
raise error.DeprecatedEnv(
"Env {} not found (valid versions include {})".format(
id, matching_envs
)
)
else:
raise error.UnregisteredEnv('No registered env with id: {}'.format(id))
raise error.UnregisteredEnv("No registered env with id: {}".format(id))
def register(self, id, **kwargs):
if id in self.env_specs:
raise error.Error('Cannot re-register id: {}'.format(id))
raise error.Error("Cannot re-register id: {}".format(id))
self.env_specs[id] = EnvSpec(id, **kwargs)
# Have a global registry
registry = EnvRegistry()
def register(id, **kwargs):
return registry.register(id, **kwargs)
def make(id, **kwargs):
return registry.make(id, **kwargs)
def spec(id):
return registry.spec(id)
warn_once = True
def patch_deprecated_methods(env):
"""
Methods renamed from '_method' to 'method', render() no longer has 'close' parameter, close is a separate method.
@@ -156,14 +198,20 @@ def patch_deprecated_methods(env):
"""
global warn_once
if warn_once:
logger.warn("Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior." % str(type(env)))
logger.warn(
"Environment '%s' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior."
% str(type(env))
)
warn_once = False
env.reset = env._reset
env.step = env._step
env.seed = env._seed
env.step = env._step
env.seed = env._seed
def render(mode):
return env._render(mode, close=False)
def close():
env._render("human", close=True)
env.render = render
env.close = close

View File

@@ -4,20 +4,30 @@ from gym.envs.robotics import fetch_env
# Ensure we get the path separator correct on windows
MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml')
MODEL_XML_PATH = os.path.join("fetch", "pick_and_place.xml")
class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle):
def __init__(self, reward_type='sparse'):
def __init__(self, reward_type="sparse"):
initial_qpos = {
'robot0:slide0': 0.405,
'robot0:slide1': 0.48,
'robot0:slide2': 0.0,
'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
"robot0:slide0": 0.405,
"robot0:slide1": 0.48,
"robot0:slide2": 0.0,
"object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0],
}
fetch_env.FetchEnv.__init__(
self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
obj_range=0.15, target_range=0.15, distance_threshold=0.05,
initial_qpos=initial_qpos, reward_type=reward_type)
self,
MODEL_XML_PATH,
has_object=True,
block_gripper=False,
n_substeps=20,
gripper_extra_height=0.2,
target_in_the_air=True,
target_offset=0.0,
obj_range=0.15,
target_range=0.15,
distance_threshold=0.05,
initial_qpos=initial_qpos,
reward_type=reward_type,
)
utils.EzPickle.__init__(self, reward_type=reward_type)

View File

@@ -4,20 +4,30 @@ from gym.envs.robotics import fetch_env
# Ensure we get the path separator correct on windows
MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
MODEL_XML_PATH = os.path.join("fetch", "push.xml")
class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle):
def __init__(self, reward_type='sparse'):
def __init__(self, reward_type="sparse"):
initial_qpos = {
'robot0:slide0': 0.405,
'robot0:slide1': 0.48,
'robot0:slide2': 0.0,
'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
"robot0:slide0": 0.405,
"robot0:slide1": 0.48,
"robot0:slide2": 0.0,
"object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0],
}
fetch_env.FetchEnv.__init__(
self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
obj_range=0.15, target_range=0.15, distance_threshold=0.05,
initial_qpos=initial_qpos, reward_type=reward_type)
self,
MODEL_XML_PATH,
has_object=True,
block_gripper=True,
n_substeps=20,
gripper_extra_height=0.0,
target_in_the_air=False,
target_offset=0.0,
obj_range=0.15,
target_range=0.15,
distance_threshold=0.05,
initial_qpos=initial_qpos,
reward_type=reward_type,
)
utils.EzPickle.__init__(self, reward_type=reward_type)

View File

@@ -4,19 +4,29 @@ from gym.envs.robotics import fetch_env
# Ensure we get the path separator correct on windows
MODEL_XML_PATH = os.path.join('fetch', 'reach.xml')
MODEL_XML_PATH = os.path.join("fetch", "reach.xml")
class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
def __init__(self, reward_type='sparse'):
def __init__(self, reward_type="sparse"):
initial_qpos = {
'robot0:slide0': 0.4049,
'robot0:slide1': 0.48,
'robot0:slide2': 0.0,
"robot0:slide0": 0.4049,
"robot0:slide1": 0.48,
"robot0:slide2": 0.0,
}
fetch_env.FetchEnv.__init__(
self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
obj_range=0.15, target_range=0.15, distance_threshold=0.05,
initial_qpos=initial_qpos, reward_type=reward_type)
self,
MODEL_XML_PATH,
has_object=False,
block_gripper=True,
n_substeps=20,
gripper_extra_height=0.2,
target_in_the_air=True,
target_offset=0.0,
obj_range=0.15,
target_range=0.15,
distance_threshold=0.05,
initial_qpos=initial_qpos,
reward_type=reward_type,
)
utils.EzPickle.__init__(self, reward_type=reward_type)

View File

@@ -6,20 +6,30 @@ from gym.envs.robotics import fetch_env
# Ensure we get the path separator correct on windows
MODEL_XML_PATH = os.path.join('fetch', 'slide.xml')
MODEL_XML_PATH = os.path.join("fetch", "slide.xml")
class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle):
def __init__(self, reward_type='sparse'):
def __init__(self, reward_type="sparse"):
initial_qpos = {
'robot0:slide0': 0.05,
'robot0:slide1': 0.48,
'robot0:slide2': 0.0,
'object0:joint': [1.7, 1.1, 0.41, 1., 0., 0., 0.],
"robot0:slide0": 0.05,
"robot0:slide1": 0.48,
"robot0:slide2": 0.0,
"object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0],
}
fetch_env.FetchEnv.__init__(
self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]),
obj_range=0.1, target_range=0.3, distance_threshold=0.05,
initial_qpos=initial_qpos, reward_type=reward_type)
self,
MODEL_XML_PATH,
has_object=True,
block_gripper=True,
n_substeps=20,
gripper_extra_height=-0.02,
target_in_the_air=False,
target_offset=np.array([0.4, 0.0, 0.0]),
obj_range=0.1,
target_range=0.3,
distance_threshold=0.05,
initial_qpos=initial_qpos,
reward_type=reward_type,
)
utils.EzPickle.__init__(self, reward_type=reward_type)

View File

@@ -9,13 +9,22 @@ def goal_distance(goal_a, goal_b):
class FetchEnv(robot_env.RobotEnv):
"""Superclass for all Fetch environments.
"""
"""Superclass for all Fetch environments."""
def __init__(
self, model_path, n_substeps, gripper_extra_height, block_gripper,
has_object, target_in_the_air, target_offset, obj_range, target_range,
distance_threshold, initial_qpos, reward_type,
self,
model_path,
n_substeps,
gripper_extra_height,
block_gripper,
has_object,
target_in_the_air,
target_offset,
obj_range,
target_range,
distance_threshold,
initial_qpos,
reward_type,
):
"""Initializes a new Fetch environment.
@@ -44,8 +53,11 @@ class FetchEnv(robot_env.RobotEnv):
self.reward_type = reward_type
super(FetchEnv, self).__init__(
model_path=model_path, n_substeps=n_substeps, n_actions=4,
initial_qpos=initial_qpos)
model_path=model_path,
n_substeps=n_substeps,
n_actions=4,
initial_qpos=initial_qpos,
)
# GoalEnv methods
# ----------------------------
@@ -53,7 +65,7 @@ class FetchEnv(robot_env.RobotEnv):
def compute_reward(self, achieved_goal, goal, info):
# Compute distance between goal and the achieved goal.
d = goal_distance(achieved_goal, goal)
if self.reward_type == 'sparse':
if self.reward_type == "sparse":
return -(d > self.distance_threshold).astype(np.float32)
else:
return -d
@@ -63,17 +75,24 @@ class FetchEnv(robot_env.RobotEnv):
def _step_callback(self):
if self.block_gripper:
self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.)
self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.)
self.sim.data.set_joint_qpos("robot0:l_gripper_finger_joint", 0.0)
self.sim.data.set_joint_qpos("robot0:r_gripper_finger_joint", 0.0)
self.sim.forward()
def _set_action(self, action):
assert action.shape == (4,)
action = action.copy() # ensure that we don't change the action outside of this scope
action = (
action.copy()
) # ensure that we don't change the action outside of this scope
pos_ctrl, gripper_ctrl = action[:3], action[3]
pos_ctrl *= 0.05 # limit maximum change in position
rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion
rot_ctrl = [
1.0,
0.0,
1.0,
0.0,
] # fixed rotation of the end effector, expressed as a quaternion
gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
assert gripper_ctrl.shape == (2,)
if self.block_gripper:
@@ -86,53 +105,66 @@ class FetchEnv(robot_env.RobotEnv):
def _get_obs(self):
# positions
grip_pos = self.sim.data.get_site_xpos('robot0:grip')
grip_pos = self.sim.data.get_site_xpos("robot0:grip")
dt = self.sim.nsubsteps * self.sim.model.opt.timestep
grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
grip_velp = self.sim.data.get_site_xvelp("robot0:grip") * dt
robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
if self.has_object:
object_pos = self.sim.data.get_site_xpos('object0')
object_pos = self.sim.data.get_site_xpos("object0")
# rotations
object_rot = rotations.mat2euler(self.sim.data.get_site_xmat('object0'))
object_rot = rotations.mat2euler(self.sim.data.get_site_xmat("object0"))
# velocities
object_velp = self.sim.data.get_site_xvelp('object0') * dt
object_velr = self.sim.data.get_site_xvelr('object0') * dt
object_velp = self.sim.data.get_site_xvelp("object0") * dt
object_velr = self.sim.data.get_site_xvelr("object0") * dt
# gripper state
object_rel_pos = object_pos - grip_pos
object_velp -= grip_velp
else:
object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0)
object_pos = (
object_rot
) = object_velp = object_velr = object_rel_pos = np.zeros(0)
gripper_state = robot_qpos[-2:]
gripper_vel = robot_qvel[-2:] * dt # change to a scalar if the gripper is made symmetric
gripper_vel = (
robot_qvel[-2:] * dt
) # change to a scalar if the gripper is made symmetric
if not self.has_object:
achieved_goal = grip_pos.copy()
else:
achieved_goal = np.squeeze(object_pos.copy())
obs = np.concatenate([
grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(),
object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel,
])
obs = np.concatenate(
[
grip_pos,
object_pos.ravel(),
object_rel_pos.ravel(),
gripper_state,
object_rot.ravel(),
object_velp.ravel(),
object_velr.ravel(),
grip_velp,
gripper_vel,
]
)
return {
'observation': obs.copy(),
'achieved_goal': achieved_goal.copy(),
'desired_goal': self.goal.copy(),
"observation": obs.copy(),
"achieved_goal": achieved_goal.copy(),
"desired_goal": self.goal.copy(),
}
def _viewer_setup(self):
body_id = self.sim.model.body_name2id('robot0:gripper_link')
body_id = self.sim.model.body_name2id("robot0:gripper_link")
lookat = self.sim.data.body_xpos[body_id]
for idx, value in enumerate(lookat):
self.viewer.cam.lookat[idx] = value
self.viewer.cam.distance = 2.5
self.viewer.cam.azimuth = 132.
self.viewer.cam.elevation = -14.
self.viewer.cam.azimuth = 132.0
self.viewer.cam.elevation = -14.0
def _render_callback(self):
# Visualize target.
sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
site_id = self.sim.model.site_name2id('target0')
site_id = self.sim.model.site_name2id("target0")
self.sim.model.site_pos[site_id] = self.goal - sites_offset[0]
self.sim.forward()
@@ -143,24 +175,30 @@ class FetchEnv(robot_env.RobotEnv):
if self.has_object:
object_xpos = self.initial_gripper_xpos[:2]
while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2)
object_qpos = self.sim.data.get_joint_qpos('object0:joint')
object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(
-self.obj_range, self.obj_range, size=2
)
object_qpos = self.sim.data.get_joint_qpos("object0:joint")
assert object_qpos.shape == (7,)
object_qpos[:2] = object_xpos
self.sim.data.set_joint_qpos('object0:joint', object_qpos)
self.sim.data.set_joint_qpos("object0:joint", object_qpos)
self.sim.forward()
return True
def _sample_goal(self):
if self.has_object:
goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(
-self.target_range, self.target_range, size=3
)
goal += self.target_offset
goal[2] = self.height_offset
if self.target_in_the_air and self.np_random.uniform() < 0.5:
goal[2] += self.np_random.uniform(0, 0.45)
else:
goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(
-self.target_range, self.target_range, size=3
)
return goal.copy()
def _is_success(self, achieved_goal, desired_goal):
@@ -174,17 +212,19 @@ class FetchEnv(robot_env.RobotEnv):
self.sim.forward()
# Move end effector into position.
gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos('robot0:grip')
gripper_rotation = np.array([1., 0., 1., 0.])
self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
gripper_target = np.array(
[-0.498, 0.005, -0.431 + self.gripper_extra_height]
) + self.sim.data.get_site_xpos("robot0:grip")
gripper_rotation = np.array([1.0, 0.0, 1.0, 0.0])
self.sim.data.set_mocap_pos("robot0:mocap", gripper_target)
self.sim.data.set_mocap_quat("robot0:mocap", gripper_rotation)
for _ in range(10):
self.sim.step()
# Extract information for sampling goals.
self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy()
self.initial_gripper_xpos = self.sim.data.get_site_xpos("robot0:grip").copy()
if self.has_object:
self.height_offset = self.sim.data.get_site_xpos('object0')[2]
self.height_offset = self.sim.data.get_site_xpos("object0")[2]
def render(self, mode='human', width=500, height=500):
def render(self, mode="human", width=500, height=500):
return super(FetchEnv, self).render(mode, width, height)

View File

@@ -8,29 +8,42 @@ from gym.envs.robotics.utils import robot_get_obs
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
raise error.DependencyNotInstalled(
"{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
e
)
)
def quat_from_angle_and_axis(angle, axis):
assert axis.shape == (3,)
axis /= np.linalg.norm(axis)
quat = np.concatenate([[np.cos(angle / 2.)], np.sin(angle / 2.) * axis])
quat = np.concatenate([[np.cos(angle / 2.0)], np.sin(angle / 2.0) * axis])
quat /= np.linalg.norm(quat)
return quat
# Ensure we get the path separator correct on windows
MANIPULATE_BLOCK_XML = os.path.join('hand', 'manipulate_block.xml')
MANIPULATE_EGG_XML = os.path.join('hand', 'manipulate_egg.xml')
MANIPULATE_PEN_XML = os.path.join('hand', 'manipulate_pen.xml')
MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block.xml")
MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg.xml")
MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen.xml")
class ManipulateEnv(hand_env.HandEnv):
def __init__(
self, model_path, target_position, target_rotation,
target_position_range, reward_type, initial_qpos=None,
randomize_initial_position=True, randomize_initial_rotation=True,
distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
self,
model_path,
target_position,
target_rotation,
target_position_range,
reward_type,
initial_qpos=None,
randomize_initial_position=True,
randomize_initial_rotation=True,
distance_threshold=0.01,
rotation_threshold=0.1,
n_substeps=20,
relative_control=False,
ignore_z_target_rotation=False,
):
"""Initializes a new Hand manipulation environment.
@@ -61,7 +74,9 @@ class ManipulateEnv(hand_env.HandEnv):
self.target_position = target_position
self.target_rotation = target_rotation
self.target_position_range = target_position_range
self.parallel_quats = [rotations.euler2quat(r) for r in rotations.get_parallel_rotations()]
self.parallel_quats = [
rotations.euler2quat(r) for r in rotations.get_parallel_rotations()
]
self.randomize_initial_rotation = randomize_initial_rotation
self.randomize_initial_position = randomize_initial_position
self.distance_threshold = distance_threshold
@@ -69,18 +84,21 @@ class ManipulateEnv(hand_env.HandEnv):
self.reward_type = reward_type
self.ignore_z_target_rotation = ignore_z_target_rotation
assert self.target_position in ['ignore', 'fixed', 'random']
assert self.target_rotation in ['ignore', 'fixed', 'xyz', 'z', 'parallel']
assert self.target_position in ["ignore", "fixed", "random"]
assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"]
initial_qpos = initial_qpos or {}
hand_env.HandEnv.__init__(
self, model_path, n_substeps=n_substeps, initial_qpos=initial_qpos,
relative_control=relative_control)
self,
model_path,
n_substeps=n_substeps,
initial_qpos=initial_qpos,
relative_control=relative_control,
)
def _get_achieved_goal(self):
# Object position and rotation.
object_qpos = self.sim.data.get_joint_qpos('object:joint')
object_qpos = self.sim.data.get_joint_qpos("object:joint")
assert object_qpos.shape == (7,)
return object_qpos
@@ -90,11 +108,11 @@ class ManipulateEnv(hand_env.HandEnv):
d_pos = np.zeros_like(goal_a[..., 0])
d_rot = np.zeros_like(goal_b[..., 0])
if self.target_position != 'ignore':
if self.target_position != "ignore":
delta_pos = goal_a[..., :3] - goal_b[..., :3]
d_pos = np.linalg.norm(delta_pos, axis=-1)
if self.target_rotation != 'ignore':
if self.target_rotation != "ignore":
quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
if self.ignore_z_target_rotation:
@@ -109,7 +127,7 @@ class ManipulateEnv(hand_env.HandEnv):
# Subtract quaternions and extract angle between them.
quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0))
d_rot = angle_diff
assert d_pos.shape == d_rot.shape
return d_pos, d_rot
@@ -118,14 +136,14 @@ class ManipulateEnv(hand_env.HandEnv):
# ----------------------------
def compute_reward(self, achieved_goal, goal, info):
if self.reward_type == 'sparse':
if self.reward_type == "sparse":
success = self._is_success(achieved_goal, goal).astype(np.float32)
return (success - 1.)
return success - 1.0
else:
d_pos, d_rot = self._goal_distance(achieved_goal, goal)
# We weigh the difference in position to avoid that `d_pos` (in meters) is completely
# dominated by `d_rot` (in radians).
return -(10. * d_pos + d_rot)
return -(10.0 * d_pos + d_rot)
# RobotEnv methods
# ----------------------------
@@ -146,7 +164,7 @@ class ManipulateEnv(hand_env.HandEnv):
self.sim.set_state(self.initial_state)
self.sim.forward()
initial_qpos = self.sim.data.get_joint_qpos('object:joint').copy()
initial_qpos = self.sim.data.get_joint_qpos("object:joint").copy()
initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
assert initial_qpos.shape == (7,)
assert initial_pos.shape == (3,)
@@ -155,42 +173,46 @@ class ManipulateEnv(hand_env.HandEnv):
# Randomization initial rotation.
if self.randomize_initial_rotation:
if self.target_rotation == 'z':
if self.target_rotation == "z":
angle = self.np_random.uniform(-np.pi, np.pi)
axis = np.array([0., 0., 1.])
axis = np.array([0.0, 0.0, 1.0])
offset_quat = quat_from_angle_and_axis(angle, axis)
initial_quat = rotations.quat_mul(initial_quat, offset_quat)
elif self.target_rotation == 'parallel':
elif self.target_rotation == "parallel":
angle = self.np_random.uniform(-np.pi, np.pi)
axis = np.array([0., 0., 1.])
axis = np.array([0.0, 0.0, 1.0])
z_quat = quat_from_angle_and_axis(angle, axis)
parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
parallel_quat = self.parallel_quats[
self.np_random.randint(len(self.parallel_quats))
]
offset_quat = rotations.quat_mul(z_quat, parallel_quat)
initial_quat = rotations.quat_mul(initial_quat, offset_quat)
elif self.target_rotation in ['xyz', 'ignore']:
elif self.target_rotation in ["xyz", "ignore"]:
angle = self.np_random.uniform(-np.pi, np.pi)
axis = self.np_random.uniform(-1., 1., size=3)
axis = self.np_random.uniform(-1.0, 1.0, size=3)
offset_quat = quat_from_angle_and_axis(angle, axis)
initial_quat = rotations.quat_mul(initial_quat, offset_quat)
elif self.target_rotation == 'fixed':
elif self.target_rotation == "fixed":
pass
else:
raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
raise error.Error(
'Unknown target_rotation option "{}".'.format(self.target_rotation)
)
# Randomize initial position.
if self.randomize_initial_position:
if self.target_position != 'fixed':
if self.target_position != "fixed":
initial_pos += self.np_random.normal(size=3, scale=0.005)
initial_quat /= np.linalg.norm(initial_quat)
initial_qpos = np.concatenate([initial_pos, initial_quat])
self.sim.data.set_joint_qpos('object:joint', initial_qpos)
self.sim.data.set_joint_qpos("object:joint", initial_qpos)
def is_on_palm():
self.sim.forward()
cube_middle_idx = self.sim.model.site_name2id('object:center')
cube_middle_idx = self.sim.model.site_name2id("object:center")
cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
is_on_palm = (cube_middle_pos[2] > 0.04)
is_on_palm = cube_middle_pos[2] > 0.04
return is_on_palm
# Run the simulation for a bunch of timesteps to let everything settle in.
@@ -205,38 +227,46 @@ class ManipulateEnv(hand_env.HandEnv):
def _sample_goal(self):
# Select a goal for the object position.
target_pos = None
if self.target_position == 'random':
if self.target_position == "random":
assert self.target_position_range.shape == (3, 2)
offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1])
offset = self.np_random.uniform(
self.target_position_range[:, 0], self.target_position_range[:, 1]
)
assert offset.shape == (3,)
target_pos = self.sim.data.get_joint_qpos('object:joint')[:3] + offset
elif self.target_position in ['ignore', 'fixed']:
target_pos = self.sim.data.get_joint_qpos('object:joint')[:3]
target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + offset
elif self.target_position in ["ignore", "fixed"]:
target_pos = self.sim.data.get_joint_qpos("object:joint")[:3]
else:
raise error.Error('Unknown target_position option "{}".'.format(self.target_position))
raise error.Error(
'Unknown target_position option "{}".'.format(self.target_position)
)
assert target_pos is not None
assert target_pos.shape == (3,)
# Select a goal for the object rotation.
target_quat = None
if self.target_rotation == 'z':
if self.target_rotation == "z":
angle = self.np_random.uniform(-np.pi, np.pi)
axis = np.array([0., 0., 1.])
axis = np.array([0.0, 0.0, 1.0])
target_quat = quat_from_angle_and_axis(angle, axis)
elif self.target_rotation == 'parallel':
elif self.target_rotation == "parallel":
angle = self.np_random.uniform(-np.pi, np.pi)
axis = np.array([0., 0., 1.])
axis = np.array([0.0, 0.0, 1.0])
target_quat = quat_from_angle_and_axis(angle, axis)
parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))]
parallel_quat = self.parallel_quats[
self.np_random.randint(len(self.parallel_quats))
]
target_quat = rotations.quat_mul(target_quat, parallel_quat)
elif self.target_rotation == 'xyz':
elif self.target_rotation == "xyz":
angle = self.np_random.uniform(-np.pi, np.pi)
axis = self.np_random.uniform(-1., 1., size=3)
axis = self.np_random.uniform(-1.0, 1.0, size=3)
target_quat = quat_from_angle_and_axis(angle, axis)
elif self.target_rotation in ['ignore', 'fixed']:
target_quat = self.sim.data.get_joint_qpos('object:joint')
elif self.target_rotation in ["ignore", "fixed"]:
target_quat = self.sim.data.get_joint_qpos("object:joint")
else:
raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation))
raise error.Error(
'Unknown target_rotation option "{}".'.format(self.target_rotation)
)
assert target_quat is not None
assert target_quat.shape == (4,)
@@ -249,55 +279,76 @@ class ManipulateEnv(hand_env.HandEnv):
# is not obscured.
goal = self.goal.copy()
assert goal.shape == (7,)
if self.target_position == 'ignore':
if self.target_position == "ignore":
# Move the object to the side since we do not care about it's position.
goal[0] += 0.15
self.sim.data.set_joint_qpos('target:joint', goal)
self.sim.data.set_joint_qvel('target:joint', np.zeros(6))
self.sim.data.set_joint_qpos("target:joint", goal)
self.sim.data.set_joint_qvel("target:joint", np.zeros(6))
if 'object_hidden' in self.sim.model.geom_names:
hidden_id = self.sim.model.geom_name2id('object_hidden')
self.sim.model.geom_rgba[hidden_id, 3] = 1.
if "object_hidden" in self.sim.model.geom_names:
hidden_id = self.sim.model.geom_name2id("object_hidden")
self.sim.model.geom_rgba[hidden_id, 3] = 1.0
self.sim.forward()
def _get_obs(self):
robot_qpos, robot_qvel = robot_get_obs(self.sim)
object_qvel = self.sim.data.get_joint_qvel('object:joint')
achieved_goal = self._get_achieved_goal().ravel() # this contains the object position + rotation
observation = np.concatenate([robot_qpos, robot_qvel, object_qvel, achieved_goal])
object_qvel = self.sim.data.get_joint_qvel("object:joint")
achieved_goal = (
self._get_achieved_goal().ravel()
) # this contains the object position + rotation
observation = np.concatenate(
[robot_qpos, robot_qvel, object_qvel, achieved_goal]
)
return {
'observation': observation.copy(),
'achieved_goal': achieved_goal.copy(),
'desired_goal': self.goal.ravel().copy(),
"observation": observation.copy(),
"achieved_goal": achieved_goal.copy(),
"desired_goal": self.goal.ravel().copy(),
}
class HandBlockEnv(ManipulateEnv, utils.EzPickle):
def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
def __init__(
self, target_position="random", target_rotation="xyz", reward_type="sparse"
):
utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
ManipulateEnv.__init__(self,
model_path=MANIPULATE_BLOCK_XML, target_position=target_position,
ManipulateEnv.__init__(
self,
model_path=MANIPULATE_BLOCK_XML,
target_position=target_position,
target_rotation=target_rotation,
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
reward_type=reward_type)
reward_type=reward_type,
)
class HandEggEnv(ManipulateEnv, utils.EzPickle):
def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
def __init__(
self, target_position="random", target_rotation="xyz", reward_type="sparse"
):
utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
ManipulateEnv.__init__(self,
model_path=MANIPULATE_EGG_XML, target_position=target_position,
ManipulateEnv.__init__(
self,
model_path=MANIPULATE_EGG_XML,
target_position=target_position,
target_rotation=target_rotation,
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
reward_type=reward_type)
reward_type=reward_type,
)
class HandPenEnv(ManipulateEnv, utils.EzPickle):
def __init__(self, target_position='random', target_rotation='xyz', reward_type='sparse'):
def __init__(
self, target_position="random", target_rotation="xyz", reward_type="sparse"
):
utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
ManipulateEnv.__init__(self,
model_path=MANIPULATE_PEN_XML, target_position=target_position,
ManipulateEnv.__init__(
self,
model_path=MANIPULATE_PEN_XML,
target_position=target_position,
target_rotation=target_rotation,
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
randomize_initial_rotation=False, reward_type=reward_type,
ignore_z_target_rotation=True, distance_threshold=0.05)
randomize_initial_rotation=False,
reward_type=reward_type,
ignore_z_target_rotation=True,
distance_threshold=0.05,
)

View File

@@ -5,18 +5,29 @@ from gym import utils, error, spaces
from gym.envs.robotics.hand import manipulate
# Ensure we get the path separator correct on windows
MANIPULATE_BLOCK_XML = os.path.join('hand', 'manipulate_block_touch_sensors.xml')
MANIPULATE_EGG_XML = os.path.join('hand', 'manipulate_egg_touch_sensors.xml')
MANIPULATE_PEN_XML = os.path.join('hand', 'manipulate_pen_touch_sensors.xml')
MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block_touch_sensors.xml")
MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg_touch_sensors.xml")
MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen_touch_sensors.xml")
class ManipulateTouchSensorsEnv(manipulate.ManipulateEnv):
def __init__(
self, model_path, target_position, target_rotation,
target_position_range, reward_type, initial_qpos={},
randomize_initial_position=True, randomize_initial_rotation=True,
distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
ignore_z_target_rotation=False, touch_visualisation="on_touch", touch_get_obs="sensordata",
self,
model_path,
target_position,
target_rotation,
target_position_range,
reward_type,
initial_qpos={},
randomize_initial_position=True,
randomize_initial_rotation=True,
distance_threshold=0.01,
rotation_threshold=0.1,
n_substeps=20,
relative_control=False,
ignore_z_target_rotation=False,
touch_visualisation="on_touch",
touch_get_obs="sensordata",
):
"""Initializes a new Hand manipulation environment with touch sensors.
@@ -40,34 +51,63 @@ class ManipulateTouchSensorsEnv(manipulate.ManipulateEnv):
self.notouch_color = [0, 0.5, 0, 0.2]
manipulate.ManipulateEnv.__init__(
self, model_path, target_position, target_rotation,
target_position_range, reward_type, initial_qpos=initial_qpos,
randomize_initial_position=randomize_initial_position, randomize_initial_rotation=randomize_initial_rotation,
distance_threshold=distance_threshold, rotation_threshold=rotation_threshold, n_substeps=n_substeps, relative_control=relative_control,
self,
model_path,
target_position,
target_rotation,
target_position_range,
reward_type,
initial_qpos=initial_qpos,
randomize_initial_position=randomize_initial_position,
randomize_initial_rotation=randomize_initial_rotation,
distance_threshold=distance_threshold,
rotation_threshold=rotation_threshold,
n_substeps=n_substeps,
relative_control=relative_control,
ignore_z_target_rotation=ignore_z_target_rotation,
)
for k, v in self.sim.model._sensor_name2id.items(): # get touch sensor site names and their ids
if 'robot0:TS_' in k:
self._touch_sensor_id_site_id.append((v, self.sim.model._site_name2id[k.replace('robot0:TS_', 'robot0:T_')]))
for (
k,
v,
) in (
self.sim.model._sensor_name2id.items()
): # get touch sensor site names and their ids
if "robot0:TS_" in k:
self._touch_sensor_id_site_id.append(
(
v,
self.sim.model._site_name2id[
k.replace("robot0:TS_", "robot0:T_")
],
)
)
self._touch_sensor_id.append(v)
if self.touch_visualisation == 'off': # set touch sensors rgba values
if self.touch_visualisation == "off": # set touch sensors rgba values
for _, site_id in self._touch_sensor_id_site_id:
self.sim.model.site_rgba[site_id][3] = 0.0
elif self.touch_visualisation == 'always':
elif self.touch_visualisation == "always":
pass
obs = self._get_obs()
self.observation_space = spaces.Dict(dict(
desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'),
))
self.observation_space = spaces.Dict(
dict(
desired_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
achieved_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
observation=spaces.Box(
-np.inf, np.inf, shape=obs["observation"].shape, dtype="float32"
),
)
)
def _render_callback(self):
super(ManipulateTouchSensorsEnv, self)._render_callback()
if self.touch_visualisation == 'on_touch':
if self.touch_visualisation == "on_touch":
for touch_sensor_id, site_id in self._touch_sensor_id_site_id:
if self.sim.data.sensordata[touch_sensor_id] != 0.0:
self.sim.model.site_rgba[site_id] = self.touch_color
@@ -76,56 +116,92 @@ class ManipulateTouchSensorsEnv(manipulate.ManipulateEnv):
def _get_obs(self):
robot_qpos, robot_qvel = manipulate.robot_get_obs(self.sim)
object_qvel = self.sim.data.get_joint_qvel('object:joint')
achieved_goal = self._get_achieved_goal().ravel() # this contains the object position + rotation
object_qvel = self.sim.data.get_joint_qvel("object:joint")
achieved_goal = (
self._get_achieved_goal().ravel()
) # this contains the object position + rotation
touch_values = [] # get touch sensor readings. if there is one, set value to 1
if self.touch_get_obs == 'sensordata':
if self.touch_get_obs == "sensordata":
touch_values = self.sim.data.sensordata[self._touch_sensor_id]
elif self.touch_get_obs == 'boolean':
elif self.touch_get_obs == "boolean":
touch_values = self.sim.data.sensordata[self._touch_sensor_id] > 0.0
elif self.touch_get_obs == 'log':
elif self.touch_get_obs == "log":
touch_values = np.log(self.sim.data.sensordata[self._touch_sensor_id] + 1.0)
observation = np.concatenate([robot_qpos, robot_qvel, object_qvel, touch_values, achieved_goal])
observation = np.concatenate(
[robot_qpos, robot_qvel, object_qvel, touch_values, achieved_goal]
)
return {
'observation': observation.copy(),
'achieved_goal': achieved_goal.copy(),
'desired_goal': self.goal.ravel().copy(),
"observation": observation.copy(),
"achieved_goal": achieved_goal.copy(),
"desired_goal": self.goal.ravel().copy(),
}
class HandBlockTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle):
def __init__(self, target_position='random', target_rotation='xyz', touch_get_obs='sensordata', reward_type='sparse'):
utils.EzPickle.__init__(self, target_position, target_rotation, touch_get_obs, reward_type)
ManipulateTouchSensorsEnv.__init__(self,
def __init__(
self,
target_position="random",
target_rotation="xyz",
touch_get_obs="sensordata",
reward_type="sparse",
):
utils.EzPickle.__init__(
self, target_position, target_rotation, touch_get_obs, reward_type
)
ManipulateTouchSensorsEnv.__init__(
self,
model_path=MANIPULATE_BLOCK_XML,
touch_get_obs=touch_get_obs,
target_rotation=target_rotation,
target_position=target_position,
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
reward_type=reward_type)
reward_type=reward_type,
)
class HandEggTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle):
def __init__(self, target_position='random', target_rotation='xyz', touch_get_obs='sensordata', reward_type='sparse'):
utils.EzPickle.__init__(self, target_position, target_rotation, touch_get_obs, reward_type)
ManipulateTouchSensorsEnv.__init__(self,
def __init__(
self,
target_position="random",
target_rotation="xyz",
touch_get_obs="sensordata",
reward_type="sparse",
):
utils.EzPickle.__init__(
self, target_position, target_rotation, touch_get_obs, reward_type
)
ManipulateTouchSensorsEnv.__init__(
self,
model_path=MANIPULATE_EGG_XML,
touch_get_obs=touch_get_obs,
target_rotation=target_rotation,
target_position=target_position,
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
reward_type=reward_type)
reward_type=reward_type,
)
class HandPenTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle):
def __init__(self, target_position='random', target_rotation='xyz', touch_get_obs='sensordata', reward_type='sparse'):
utils.EzPickle.__init__(self, target_position, target_rotation, touch_get_obs, reward_type)
ManipulateTouchSensorsEnv.__init__(self,
def __init__(
self,
target_position="random",
target_rotation="xyz",
touch_get_obs="sensordata",
reward_type="sparse",
):
utils.EzPickle.__init__(
self, target_position, target_rotation, touch_get_obs, reward_type
)
ManipulateTouchSensorsEnv.__init__(
self,
model_path=MANIPULATE_PEN_XML,
touch_get_obs=touch_get_obs,
target_rotation=target_rotation,
target_position=target_position,
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
randomize_initial_rotation=False, reward_type=reward_type,
ignore_z_target_rotation=True, distance_threshold=0.05)
randomize_initial_rotation=False,
reward_type=reward_type,
ignore_z_target_rotation=True,
distance_threshold=0.05,
)

View File

@@ -7,44 +7,44 @@ from gym.envs.robotics.utils import robot_get_obs
FINGERTIP_SITE_NAMES = [
'robot0:S_fftip',
'robot0:S_mftip',
'robot0:S_rftip',
'robot0:S_lftip',
'robot0:S_thtip',
"robot0:S_fftip",
"robot0:S_mftip",
"robot0:S_rftip",
"robot0:S_lftip",
"robot0:S_thtip",
]
DEFAULT_INITIAL_QPOS = {
'robot0:WRJ1': -0.16514339750464327,
'robot0:WRJ0': -0.31973286565062153,
'robot0:FFJ3': 0.14340512546557435,
'robot0:FFJ2': 0.32028208333591573,
'robot0:FFJ1': 0.7126053607727917,
'robot0:FFJ0': 0.6705281001412586,
'robot0:MFJ3': 0.000246444303701037,
'robot0:MFJ2': 0.3152655251085491,
'robot0:MFJ1': 0.7659800313729842,
'robot0:MFJ0': 0.7323156897425923,
'robot0:RFJ3': 0.00038520700007378114,
'robot0:RFJ2': 0.36743546201985233,
'robot0:RFJ1': 0.7119514095008576,
'robot0:RFJ0': 0.6699446327514138,
'robot0:LFJ4': 0.0525442258033891,
'robot0:LFJ3': -0.13615534724474673,
'robot0:LFJ2': 0.39872030433433003,
'robot0:LFJ1': 0.7415570009679252,
'robot0:LFJ0': 0.704096378652974,
'robot0:THJ4': 0.003673823825070126,
'robot0:THJ3': 0.5506291436028695,
'robot0:THJ2': -0.014515151997119306,
'robot0:THJ1': -0.0015229223564485414,
'robot0:THJ0': -0.7894883021600622,
"robot0:WRJ1": -0.16514339750464327,
"robot0:WRJ0": -0.31973286565062153,
"robot0:FFJ3": 0.14340512546557435,
"robot0:FFJ2": 0.32028208333591573,
"robot0:FFJ1": 0.7126053607727917,
"robot0:FFJ0": 0.6705281001412586,
"robot0:MFJ3": 0.000246444303701037,
"robot0:MFJ2": 0.3152655251085491,
"robot0:MFJ1": 0.7659800313729842,
"robot0:MFJ0": 0.7323156897425923,
"robot0:RFJ3": 0.00038520700007378114,
"robot0:RFJ2": 0.36743546201985233,
"robot0:RFJ1": 0.7119514095008576,
"robot0:RFJ0": 0.6699446327514138,
"robot0:LFJ4": 0.0525442258033891,
"robot0:LFJ3": -0.13615534724474673,
"robot0:LFJ2": 0.39872030433433003,
"robot0:LFJ1": 0.7415570009679252,
"robot0:LFJ0": 0.704096378652974,
"robot0:THJ4": 0.003673823825070126,
"robot0:THJ3": 0.5506291436028695,
"robot0:THJ2": -0.014515151997119306,
"robot0:THJ1": -0.0015229223564485414,
"robot0:THJ0": -0.7894883021600622,
}
# Ensure we get the path separator correct on windows
MODEL_XML_PATH = os.path.join('hand', 'reach.xml')
MODEL_XML_PATH = os.path.join("hand", "reach.xml")
def goal_distance(goal_a, goal_b):
@@ -54,16 +54,24 @@ def goal_distance(goal_a, goal_b):
class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
def __init__(
self, distance_threshold=0.01, n_substeps=20, relative_control=False,
initial_qpos=DEFAULT_INITIAL_QPOS, reward_type='sparse',
self,
distance_threshold=0.01,
n_substeps=20,
relative_control=False,
initial_qpos=DEFAULT_INITIAL_QPOS,
reward_type="sparse",
):
utils.EzPickle.__init__(**locals())
self.distance_threshold = distance_threshold
self.reward_type = reward_type
hand_env.HandEnv.__init__(
self, MODEL_XML_PATH, n_substeps=n_substeps, initial_qpos=initial_qpos,
relative_control=relative_control)
self,
MODEL_XML_PATH,
n_substeps=n_substeps,
initial_qpos=initial_qpos,
relative_control=relative_control,
)
def _get_achieved_goal(self):
goal = [self.sim.data.get_site_xpos(name) for name in FINGERTIP_SITE_NAMES]
@@ -74,7 +82,7 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
def compute_reward(self, achieved_goal, goal, info):
d = goal_distance(achieved_goal, goal)
if self.reward_type == 'sparse':
if self.reward_type == "sparse":
return -(d > self.distance_threshold).astype(np.float32)
else:
return -d
@@ -88,20 +96,22 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
self.sim.forward()
self.initial_goal = self._get_achieved_goal().copy()
self.palm_xpos = self.sim.data.body_xpos[self.sim.model.body_name2id('robot0:palm')].copy()
self.palm_xpos = self.sim.data.body_xpos[
self.sim.model.body_name2id("robot0:palm")
].copy()
def _get_obs(self):
robot_qpos, robot_qvel = robot_get_obs(self.sim)
achieved_goal = self._get_achieved_goal().ravel()
observation = np.concatenate([robot_qpos, robot_qvel, achieved_goal])
return {
'observation': observation.copy(),
'achieved_goal': achieved_goal.copy(),
'desired_goal': self.goal.copy(),
"observation": observation.copy(),
"achieved_goal": achieved_goal.copy(),
"desired_goal": self.goal.copy(),
}
def _sample_goal(self):
thumb_name = 'robot0:S_thtip'
thumb_name = "robot0:S_thtip"
finger_names = [name for name in FINGERTIP_SITE_NAMES if name != thumb_name]
finger_name = self.np_random.choice(finger_names)
@@ -117,7 +127,7 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
# overlap.
goal = self.initial_goal.copy().reshape(-1, 3)
for idx in [thumb_idx, finger_idx]:
offset_direction = (meeting_pos - goal[idx])
offset_direction = meeting_pos - goal[idx]
offset_direction /= np.linalg.norm(offset_direction)
goal[idx] = meeting_pos - 0.005 * offset_direction
@@ -136,14 +146,16 @@ class HandReachEnv(hand_env.HandEnv, utils.EzPickle):
sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
goal = self.goal.reshape(5, 3)
for finger_idx in range(5):
site_name = 'target{}'.format(finger_idx)
site_name = "target{}".format(finger_idx)
site_id = self.sim.model.site_name2id(site_name)
self.sim.model.site_pos[site_id] = goal[finger_idx] - sites_offset[site_id]
# Visualize finger positions.
achieved_goal = self._get_achieved_goal().reshape(5, 3)
for finger_idx in range(5):
site_name = 'finger{}'.format(finger_idx)
site_name = "finger{}".format(finger_idx)
site_id = self.sim.model.site_name2id(site_name)
self.sim.model.site_pos[site_id] = achieved_goal[finger_idx] - sites_offset[site_id]
self.sim.model.site_pos[site_id] = (
achieved_goal[finger_idx] - sites_offset[site_id]
)
self.sim.forward()

View File

@@ -13,8 +13,11 @@ class HandEnv(robot_env.RobotEnv):
self.relative_control = relative_control
super(HandEnv, self).__init__(
model_path=model_path, n_substeps=n_substeps, n_actions=20,
initial_qpos=initial_qpos)
model_path=model_path,
n_substeps=n_substeps,
n_actions=20,
initial_qpos=initial_qpos,
)
# RobotEnv methods
# ----------------------------
@@ -23,30 +26,35 @@ class HandEnv(robot_env.RobotEnv):
assert action.shape == (20,)
ctrlrange = self.sim.model.actuator_ctrlrange
actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.
actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.0
if self.relative_control:
actuation_center = np.zeros_like(action)
for i in range(self.sim.data.ctrl.shape[0]):
actuation_center[i] = self.sim.data.get_joint_qpos(
self.sim.model.actuator_names[i].replace(':A_', ':'))
for joint_name in ['FF', 'MF', 'RF', 'LF']:
self.sim.model.actuator_names[i].replace(":A_", ":")
)
for joint_name in ["FF", "MF", "RF", "LF"]:
act_idx = self.sim.model.actuator_name2id(
'robot0:A_{}J1'.format(joint_name))
"robot0:A_{}J1".format(joint_name)
)
actuation_center[act_idx] += self.sim.data.get_joint_qpos(
'robot0:{}J0'.format(joint_name))
"robot0:{}J0".format(joint_name)
)
else:
actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.
actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.0
self.sim.data.ctrl[:] = actuation_center + action * actuation_range
self.sim.data.ctrl[:] = np.clip(self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1])
self.sim.data.ctrl[:] = np.clip(
self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1]
)
def _viewer_setup(self):
body_id = self.sim.model.body_name2id('robot0:palm')
body_id = self.sim.model.body_name2id("robot0:palm")
lookat = self.sim.data.body_xpos[body_id]
for idx, value in enumerate(lookat):
self.viewer.cam.lookat[idx] = value
self.viewer.cam.distance = 0.5
self.viewer.cam.azimuth = 55.
self.viewer.cam.elevation = -25.
self.viewer.cam.azimuth = 55.0
self.viewer.cam.elevation = -25.0
def render(self, mode='human', width=500, height=500):
def render(self, mode="human", width=500, height=500):
return super(HandEnv, self).render(mode, width, height)

View File

@@ -9,18 +9,23 @@ from gym.utils import seeding
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
raise error.DependencyNotInstalled(
"{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
e
)
)
DEFAULT_SIZE = 500
class RobotEnv(gym.GoalEnv):
def __init__(self, model_path, initial_qpos, n_actions, n_substeps):
if model_path.startswith('/'):
if model_path.startswith("/"):
fullpath = model_path
else:
fullpath = os.path.join(os.path.dirname(__file__), 'assets', model_path)
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
if not os.path.exists(fullpath):
raise IOError('File {} does not exist'.format(fullpath))
raise IOError("File {} does not exist".format(fullpath))
model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps)
@@ -28,8 +33,8 @@ class RobotEnv(gym.GoalEnv):
self._viewers = {}
self.metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": int(np.round(1.0 / self.dt)),
}
self.seed()
@@ -38,12 +43,20 @@ class RobotEnv(gym.GoalEnv):
self.goal = self._sample_goal()
obs = self._get_obs()
self.action_space = spaces.Box(-1., 1., shape=(n_actions,), dtype='float32')
self.observation_space = spaces.Dict(dict(
desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'),
))
self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32")
self.observation_space = spaces.Dict(
dict(
desired_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
achieved_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
observation=spaces.Box(
-np.inf, np.inf, shape=obs["observation"].shape, dtype="float32"
),
)
)
@property
def dt(self):
@@ -65,9 +78,9 @@ class RobotEnv(gym.GoalEnv):
done = False
info = {
'is_success': self._is_success(obs['achieved_goal'], self.goal),
"is_success": self._is_success(obs["achieved_goal"], self.goal),
}
reward = self.compute_reward(obs['achieved_goal'], self.goal, info)
reward = self.compute_reward(obs["achieved_goal"], self.goal, info)
return obs, reward, done, info
def reset(self):
@@ -90,23 +103,23 @@ class RobotEnv(gym.GoalEnv):
self.viewer = None
self._viewers = {}
def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
def render(self, mode="human", width=DEFAULT_SIZE, height=DEFAULT_SIZE):
self._render_callback()
if mode == 'rgb_array':
if mode == "rgb_array":
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif mode == 'human':
elif mode == "human":
self._get_viewer(mode).render()
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == 'human':
if mode == "human":
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode == 'rgb_array':
elif mode == "rgb_array":
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, device_id=-1)
self._viewer_setup()
self._viewers[mode] = self.viewer
@@ -126,23 +139,19 @@ class RobotEnv(gym.GoalEnv):
return True
def _get_obs(self):
"""Returns the observation.
"""
"""Returns the observation."""
raise NotImplementedError()
def _set_action(self, action):
"""Applies the given action to the simulation.
"""
"""Applies the given action to the simulation."""
raise NotImplementedError()
def _is_success(self, achieved_goal, desired_goal):
"""Indicates whether or not the achieved goal successfully achieved the desired goal.
"""
"""Indicates whether or not the achieved goal successfully achieved the desired goal."""
raise NotImplementedError()
def _sample_goal(self):
"""Samples a new goal and returns it.
"""
"""Samples a new goal and returns it."""
raise NotImplementedError()
def _env_setup(self, initial_qpos):

View File

@@ -31,7 +31,7 @@
import numpy as np
import itertools
'''
"""
Rotations
=========
@@ -101,7 +101,7 @@ TODO / Missing
- Random sampling (e.g. sample uniform random rotation)
- Performance benchmarks/measurements
- (Maybe) define everything as to/from matricies, for simplicity
'''
"""
# For testing whether a number is close to zero
_FLOAT_EPS = np.finfo(np.float64).eps
@@ -109,7 +109,7 @@ _EPS4 = _FLOAT_EPS * 4.0
def euler2mat(euler):
""" Convert Euler Angles to Rotation Matrix. See rotation.py for notes """
"""Convert Euler Angles to Rotation Matrix. See rotation.py for notes"""
euler = np.asarray(euler, dtype=np.float64)
assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)
@@ -133,7 +133,7 @@ def euler2mat(euler):
def euler2quat(euler):
""" Convert Euler Angles to Quaternions. See rotation.py for notes """
"""Convert Euler Angles to Quaternions. See rotation.py for notes"""
euler = np.asarray(euler, dtype=np.float64)
assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)
@@ -152,27 +152,29 @@ def euler2quat(euler):
def mat2euler(mat):
""" Convert Rotation Matrix to Euler Angles. See rotation.py for notes """
"""Convert Rotation Matrix to Euler Angles. See rotation.py for notes"""
mat = np.asarray(mat, dtype=np.float64)
assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2])
condition = cy > _EPS4
euler = np.empty(mat.shape[:-1], dtype=np.float64)
euler[..., 2] = np.where(condition,
-np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
-np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]))
euler[..., 1] = np.where(condition,
-np.arctan2(-mat[..., 0, 2], cy),
-np.arctan2(-mat[..., 0, 2], cy))
euler[..., 0] = np.where(condition,
-np.arctan2(mat[..., 1, 2], mat[..., 2, 2]),
0.0)
euler[..., 2] = np.where(
condition,
-np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
-np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]),
)
euler[..., 1] = np.where(
condition, -np.arctan2(-mat[..., 0, 2], cy), -np.arctan2(-mat[..., 0, 2], cy)
)
euler[..., 0] = np.where(
condition, -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 0.0
)
return euler
def mat2quat(mat):
""" Convert Rotation Matrix to Quaternion. See rotation.py for notes """
"""Convert Rotation Matrix to Quaternion. See rotation.py for notes"""
mat = np.asarray(mat, dtype=np.float64)
assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
@@ -194,7 +196,7 @@ def mat2quat(mat):
K /= 3.0
# TODO: vectorize this -- probably could be made faster
q = np.empty(K.shape[:-2] + (4,))
it = np.nditer(q[..., 0], flags=['multi_index'])
it = np.nditer(q[..., 0], flags=["multi_index"])
while not it.finished:
# Use Hermitian eigenvectors, values for speed
vals, vecs = np.linalg.eigh(K[it.multi_index])
@@ -209,7 +211,7 @@ def mat2quat(mat):
def quat2euler(quat):
""" Convert Quaternion to Euler Angles. See rotation.py for notes """
"""Convert Quaternion to Euler Angles. See rotation.py for notes"""
return mat2euler(quat2mat(quat))
@@ -223,7 +225,7 @@ def subtract_euler(e1, e2):
def quat2mat(quat):
""" Convert Quaternion to Euler Angles. See rotation.py for notes """
"""Convert Quaternion to Euler Angles. See rotation.py for notes"""
quat = np.asarray(quat, dtype=np.float64)
assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat)
@@ -247,11 +249,13 @@ def quat2mat(quat):
mat[..., 2, 2] = 1.0 - (xX + yY)
return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3))
def quat_conjugate(q):
inv_q = -q
inv_q[..., 0] *= -1
return inv_q
def quat_mul(q0, q1):
assert q0.shape == q1.shape
assert q0.shape[-1] == 4
@@ -277,83 +281,97 @@ def quat_mul(q0, q1):
assert q.shape == q0.shape
return q
def quat_rot_vec(q, v0):
q_v0 = np.array([0, v0[0], v0[1], v0[2]])
q_v = quat_mul(q, quat_mul(q_v0, quat_conjugate(q)))
v = q_v[1:]
return v
def quat_identity():
return np.array([1, 0, 0, 0])
def quat2axisangle(quat):
theta = 0;
axis = np.array([0, 0, 1]);
theta = 0
axis = np.array([0, 0, 1])
sin_theta = np.linalg.norm(quat[1:])
if (sin_theta > 0.0001):
if sin_theta > 0.0001:
theta = 2 * np.arcsin(sin_theta)
theta *= 1 if quat[0] >= 0 else -1
axis = quat[1:] / sin_theta
return axis, theta
def euler2point_euler(euler):
_euler = euler.copy()
if len(_euler.shape) < 2:
_euler = np.expand_dims(_euler,0)
assert(_euler.shape[1] == 3)
_euler = np.expand_dims(_euler, 0)
assert _euler.shape[1] == 3
_euler_sin = np.sin(_euler)
_euler_cos = np.cos(_euler)
return np.concatenate([_euler_sin, _euler_cos], axis=-1)
def point_euler2euler(euler):
_euler = euler.copy()
if len(_euler.shape) < 2:
_euler = np.expand_dims(_euler,0)
assert(_euler.shape[1] == 6)
_euler = np.expand_dims(_euler, 0)
assert _euler.shape[1] == 6
angle = np.arctan(_euler[..., :3] / _euler[..., 3:])
angle[_euler[..., 3:] < 0] += np.pi
return angle
def quat2point_quat(quat):
# Should be in qw, qx, qy, qz
_quat = quat.copy()
if len(_quat.shape) < 2:
_quat = np.expand_dims(_quat, 0)
assert(_quat.shape[1] == 4)
angle = np.arccos(_quat[:,[0]]) * 2
assert _quat.shape[1] == 4
angle = np.arccos(_quat[:, [0]]) * 2
xyz = _quat[:, 1:]
xyz[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5] = (xyz / np.sin(angle / 2))[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5]
return np.concatenate([np.sin(angle),np.cos(angle), xyz], axis=-1)
xyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (xyz / np.sin(angle / 2))[
np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
]
return np.concatenate([np.sin(angle), np.cos(angle), xyz], axis=-1)
def point_quat2quat(quat):
_quat = quat.copy()
if len(_quat.shape) < 2:
_quat = np.expand_dims(_quat, 0)
assert(_quat.shape[1] == 5)
angle = np.arctan(_quat[:,[0]] / _quat[:,[1]])
assert _quat.shape[1] == 5
angle = np.arctan(_quat[:, [0]] / _quat[:, [1]])
qw = np.cos(angle / 2)
qxyz = _quat[:, 2:]
qxyz[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5] = (qxyz * np.sin(angle/2))[np.squeeze(np.abs(np.sin(angle/2))) >= 1e-5]
qxyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (qxyz * np.sin(angle / 2))[
np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
]
return np.concatenate([qw, qxyz], axis=-1)
def normalize_angles(angles):
'''Puts angles in [-pi, pi] range.'''
"""Puts angles in [-pi, pi] range."""
angles = angles.copy()
if angles.size > 0:
angles = (angles + np.pi) % (2 * np.pi) - np.pi
assert -np.pi-1e-6 <= angles.min() and angles.max() <= np.pi+1e-6
assert -np.pi - 1e-6 <= angles.min() and angles.max() <= np.pi + 1e-6
return angles
def round_to_straight_angles(angles):
'''Returns closest angle modulo 90 degrees '''
"""Returns closest angle modulo 90 degrees"""
angles = np.round(angles / (np.pi / 2)) * (np.pi / 2)
return normalize_angles(angles)
def get_parallel_rotations():
mult90 = [0, np.pi/2, -np.pi/2, np.pi]
mult90 = [0, np.pi / 2, -np.pi / 2, np.pi]
parallel_rotations = []
for euler in itertools.product(mult90, repeat=3):
canonical = mat2euler(euler2mat(euler))

View File

@@ -1,10 +1,15 @@
import numpy as np
from gym import error
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
raise error.DependencyNotInstalled(
"{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
e
)
)
def robot_get_obs(sim):
@@ -12,7 +17,7 @@ def robot_get_obs(sim):
a robot.
"""
if sim.data.qpos is not None and sim.model.joint_names:
names = [n for n in sim.model.joint_names if n.startswith('robot')]
names = [n for n in sim.model.joint_names if n.startswith("robot")]
return (
np.array([sim.data.get_joint_qpos(name) for name in names]),
np.array([sim.data.get_joint_qvel(name) for name in names]),
@@ -25,7 +30,7 @@ def ctrl_set_action(sim, action):
For position actuators it sets the target relative to the current qpos.
"""
if sim.model.nmocap > 0:
_, action = np.split(action, (sim.model.nmocap * 7, ))
_, action = np.split(action, (sim.model.nmocap * 7,))
if sim.data.ctrl is not None:
for i in range(action.shape[0]):
if sim.model.actuator_biastype[i] == 0:
@@ -45,7 +50,7 @@ def mocap_set_action(sim, action):
constraint optimizer tries to center the welded body on the mocap.
"""
if sim.model.nmocap > 0:
action, _ = np.split(action, (sim.model.nmocap * 7, ))
action, _ = np.split(action, (sim.model.nmocap * 7,))
action = action.reshape(sim.model.nmocap, 7)
pos_delta = action[:, :3]
@@ -57,13 +62,11 @@ def mocap_set_action(sim, action):
def reset_mocap_welds(sim):
"""Resets the mocap welds that we use for actuation.
"""
"""Resets the mocap welds that we use for actuation."""
if sim.model.nmocap > 0 and sim.model.eq_data is not None:
for i in range(sim.model.eq_data.shape[0]):
if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
sim.model.eq_data[i, :] = np.array(
[0., 0., 0., 1., 0., 0., 0.])
sim.model.eq_data[i, :] = np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
sim.forward()
@@ -72,13 +75,15 @@ def reset_mocap2body_xpos(sim):
values as the bodies they're welded to.
"""
if (sim.model.eq_type is None or
sim.model.eq_obj1id is None or
sim.model.eq_obj2id is None):
if (
sim.model.eq_type is None
or sim.model.eq_obj1id is None
or sim.model.eq_obj2id is None
):
return
for eq_type, obj1_id, obj2_id in zip(sim.model.eq_type,
sim.model.eq_obj1id,
sim.model.eq_obj2id):
for eq_type, obj1_id, obj2_id in zip(
sim.model.eq_type, sim.model.eq_obj1id, sim.model.eq_obj2id
):
if eq_type != mujoco_py.const.EQ_WELD:
continue
@@ -91,6 +96,6 @@ def reset_mocap2body_xpos(sim):
mocap_id = sim.model.body_mocapid[obj2_id]
body_idx = obj1_id
assert (mocap_id != -1)
assert mocap_id != -1
sim.data.mocap_pos[mocap_id][:] = sim.data.body_xpos[body_idx]
sim.data.mocap_quat[mocap_id][:] = sim.data.body_xquat[body_idx]

View File

@@ -4,40 +4,54 @@ import os
SKIP_MUJOCO_WARNING_MESSAGE = (
"Cannot run mujoco test (either license key not found or mujoco not"
"installed properly).")
"installed properly)."
)
skip_mujoco = not (os.environ.get('MUJOCO_KEY'))
skip_mujoco = not (os.environ.get("MUJOCO_KEY"))
if not skip_mujoco:
try:
import mujoco_py
except ImportError:
skip_mujoco = True
def should_skip_env_spec_for_tests(spec):
# We skip tests for envs that require dependencies or are otherwise
# troublesome to run frequently
ep = spec.entry_point
# Skip mujoco tests for pull request CI
if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')):
if skip_mujoco and (
ep.startswith("gym.envs.mujoco") or ep.startswith("gym.envs.robotics:")
):
return True
try:
import atari_py
except ImportError:
if ep.startswith('gym.envs.atari'):
if ep.startswith("gym.envs.atari"):
return True
try:
import Box2D
except ImportError:
if ep.startswith('gym.envs.box2d'):
if ep.startswith("gym.envs.box2d"):
return True
if ( 'GoEnv' in ep or
'HexEnv' in ep or
(ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
if (
"GoEnv" in ep
or "HexEnv" in ep
or (
ep.startswith("gym.envs.atari")
and not spec.id.startswith("Pong")
and not spec.id.startswith("Seaquest")
)
):
logger.warn("Skipping tests for env {}".format(ep))
return True
return False
spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)]
spec_list = [
spec
for spec in sorted(envs.registry.all(), key=lambda x: x.id)
if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)
]

View File

@@ -3,6 +3,7 @@ import pytest
from gym.envs.tests.spec_list import spec_list
@pytest.mark.parametrize("spec", spec_list)
def test_env(spec):
# Note that this precludes running this test in multiple
@@ -24,15 +25,21 @@ def test_env(spec):
step_responses2 = [env2.step(action) for action in action_samples2]
env2.close()
for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
for i, (action_sample1, action_sample2) in enumerate(
zip(action_samples1, action_samples2)
):
try:
assert_equals(action_sample1, action_sample2)
except AssertionError:
print('env1.action_space=', env1.action_space)
print('env2.action_space=', env2.action_space)
print('action_samples1=', action_samples1)
print('action_samples2=', action_samples2)
print('[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2))
print("env1.action_space=", env1.action_space)
print("env2.action_space=", env2.action_space)
print("action_samples1=", action_samples1)
print("action_samples2=", action_samples2)
print(
"[{}] action_sample1: {}, action_sample2: {}".format(
i, action_sample1, action_sample2
)
)
raise
# Don't check rollout equality if it's a a nondeterministic
@@ -42,21 +49,26 @@ def test_env(spec):
assert_equals(initial_observation1, initial_observation2)
for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
assert_equals(o1, o2, '[{}] '.format(i))
assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)
for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(
zip(step_responses1, step_responses2)
):
assert_equals(o1, o2, "[{}] ".format(i))
assert r1 == r2, "[{}] r1: {}, r2: {}".format(i, r1, r2)
assert d1 == d2, "[{}] d1: {}, d2: {}".format(i, d1, d2)
# Go returns a Pachi game board in info, which doesn't
# properly check equality. For now, we hack around this by
# just skipping Go.
if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
assert_equals(i1, i2, '[{}] '.format(i))
if spec.id not in ["Go9x9-v0", "Go19x19-v0"]:
assert_equals(i1, i2, "[{}] ".format(i))
def assert_equals(a, b, prefix=None):
assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b)
if isinstance(a, dict):
assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b)
assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(
prefix, a, b
)
for k in a.keys():
v_a = a[k]

View File

@@ -4,6 +4,7 @@ import numpy as np
from gym import envs
from gym.envs.tests.spec_list import spec_list
# This runs a smoketest on each official registered env. We may want
# to try also running environments which are not officially registered
# envs.
@@ -15,30 +16,33 @@ def test_env(spec):
# Check that dtype is explicitly declared for gym.Box spaces
for warning_msg in warnings:
assert not 'autodetected dtype' in str(warning_msg.message)
assert "autodetected dtype" not in str(warning_msg.message)
ob_space = env.observation_space
act_space = env.action_space
ob = env.reset()
assert ob_space.contains(ob), 'Reset observation: {!r} not in space'.format(ob)
assert ob_space.contains(ob), "Reset observation: {!r} not in space".format(ob)
a = act_space.sample()
observation, reward, done, _info = env.step(a)
assert ob_space.contains(observation), 'Step observation: {!r} not in space'.format(observation)
assert ob_space.contains(observation), "Step observation: {!r} not in space".format(
observation
)
assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env)
assert isinstance(done, bool), "Expected {} to be a boolean".format(done)
for mode in env.metadata.get('render.modes', []):
for mode in env.metadata.get("render.modes", []):
env.render(mode=mode)
# Make sure we can render the environment after close.
for mode in env.metadata.get('render.modes', []):
for mode in env.metadata.get("render.modes", []):
env.render(mode=mode)
env.close()
# Run a longer rollout on some environments
def test_random_rollout():
for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
for env in [envs.make("CartPole-v0"), envs.make("FrozenLake-v0")]:
agent = lambda ob: env.action_space.sample()
ob = env.reset()
for _ in range(10):
@@ -46,19 +50,20 @@ def test_random_rollout():
a = agent(ob)
assert env.action_space.contains(a)
(ob, _reward, done, _info) = env.step(a)
if done: break
if done:
break
env.close()
def test_env_render_result_is_immutable():
environs = [
envs.make('Taxi-v3'),
envs.make('FrozenLake-v0'),
envs.make('Reverse-v0'),
envs.make("Taxi-v3"),
envs.make("FrozenLake-v0"),
envs.make("Reverse-v0"),
]
for env in environs:
env.reset()
output = env.render(mode='ansi')
output = env.render(mode="ansi")
assert isinstance(output, str)
env.close()

View File

@@ -17,14 +17,18 @@ ROLLOUT_STEPS = 100
episodes = ROLLOUT_STEPS
steps = ROLLOUT_STEPS
ROLLOUT_FILE = os.path.join(DATA_DIR, 'rollout.json')
ROLLOUT_FILE = os.path.join(DATA_DIR, "rollout.json")
if not os.path.isfile(ROLLOUT_FILE):
with open(ROLLOUT_FILE, "w") as outfile:
json.dump({}, outfile, indent=2)
def hash_object(unhashed):
return hashlib.sha256(str(unhashed).encode('utf-16')).hexdigest() # This is really bad, str could be same while values change
return hashlib.sha256(
str(unhashed).encode("utf-16")
).hexdigest() # This is really bad, str could be same while values change
def generate_rollout_hash(spec):
spaces.seed(0)
@@ -38,7 +42,8 @@ def generate_rollout_hash(spec):
total_steps = 0
for episode in range(episodes):
if total_steps >= ROLLOUT_STEPS: break
if total_steps >= ROLLOUT_STEPS:
break
observation = env.reset()
for step in range(steps):
@@ -51,9 +56,11 @@ def generate_rollout_hash(spec):
done_list.append(done)
total_steps += 1
if total_steps >= ROLLOUT_STEPS: break
if total_steps >= ROLLOUT_STEPS:
break
if done: break
if done:
break
observations_hash = hash_object(observation_list)
actions_hash = hash_object(action_list)
@@ -63,6 +70,7 @@ def generate_rollout_hash(spec):
env.close()
return observations_hash, actions_hash, rewards_hash, dones_hash
@pytest.mark.parametrize("spec", spec_list)
def test_env_semantics(spec):
logger.warn("Skipping this test. Existing hashes were generated in a bad way")
@@ -72,7 +80,11 @@ def test_env_semantics(spec):
if spec.id not in rollout_dict:
if not spec.nondeterministic:
logger.warn("Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(spec.id))
logger.warn(
"Rollout does not exist for {}, run generate_json.py to generate rollouts for new envs".format(
spec.id
)
)
return
logger.info("Testing rollout for {} environment...".format(spec.id))
@@ -80,14 +92,30 @@ def test_env_semantics(spec):
observations_now, actions_now, rewards_now, dones_now = generate_rollout_hash(spec)
errors = []
if rollout_dict[spec.id]['observations'] != observations_now:
errors.append('Observations not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['observations'], observations_now))
if rollout_dict[spec.id]['actions'] != actions_now:
errors.append('Actions not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['actions'], actions_now))
if rollout_dict[spec.id]['rewards'] != rewards_now:
errors.append('Rewards not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['rewards'], rewards_now))
if rollout_dict[spec.id]['dones'] != dones_now:
errors.append('Dones not equal for {} -- expected {} but got {}'.format(spec.id, rollout_dict[spec.id]['dones'], dones_now))
if rollout_dict[spec.id]["observations"] != observations_now:
errors.append(
"Observations not equal for {} -- expected {} but got {}".format(
spec.id, rollout_dict[spec.id]["observations"], observations_now
)
)
if rollout_dict[spec.id]["actions"] != actions_now:
errors.append(
"Actions not equal for {} -- expected {} but got {}".format(
spec.id, rollout_dict[spec.id]["actions"], actions_now
)
)
if rollout_dict[spec.id]["rewards"] != rewards_now:
errors.append(
"Rewards not equal for {} -- expected {} but got {}".format(
spec.id, rollout_dict[spec.id]["rewards"], rewards_now
)
)
if rollout_dict[spec.id]["dones"] != dones_now:
errors.append(
"Dones not equal for {} -- expected {} but got {}".format(
spec.id, rollout_dict[spec.id]["dones"], dones_now
)
)
if len(errors):
for error in errors:
logger.warn(error)

View File

@@ -3,25 +3,25 @@ import numpy as np
from gym.envs.toy_text.frozen_lake import generate_random_map
# Test that FrozenLake map generation creates valid maps of various sizes.
def test_frozenlake_dfs_map_generation():
def frozenlake_dfs_path_exists(res):
frontier, discovered = [], set()
frontier.append((0,0))
frontier.append((0, 0))
while frontier:
r, c = frontier.pop()
if not (r,c) in discovered:
discovered.add((r,c))
if not (r, c) in discovered:
discovered.add((r, c))
directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
for x, y in directions:
r_new = r + x
c_new = c + y
if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
continue
if res[r_new][c_new] == 'G':
if res[r_new][c_new] == "G":
return True
if (res[r_new][c_new] not in '#H'):
if res[r_new][c_new] not in "#H":
frontier.append((r_new, c_new))
return False

View File

@@ -4,10 +4,9 @@ from gym import envs
from gym.envs.tests.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
def verify_environments_match(old_environment_id,
new_environment_id,
seed=1,
num_actions=1000):
def verify_environments_match(
old_environment_id, new_environment_id, seed=1, num_actions=1000
):
old_environment = envs.make(old_environment_id)
new_environment = envs.make(new_environment_id)
@@ -21,10 +20,8 @@ def verify_environments_match(old_environment_id,
for i in range(num_actions):
action = old_environment.action_space.sample()
old_observation, old_reward, old_done, old_info = old_environment.step(
action)
new_observation, new_reward, new_done, new_info = new_environment.step(
action)
old_observation, old_reward, old_done, old_info = old_environment.step(action)
new_observation, new_reward, new_done, new_info = new_environment.step(action)
eps = 1e-6
np.testing.assert_allclose(old_observation, new_observation, atol=eps)
@@ -39,47 +36,29 @@ def verify_environments_match(old_environment_id,
class Mujocov2Tov3ConversionTest(unittest.TestCase):
def test_environments_match(self):
test_cases = (
{
'old_id': 'Swimmer-v2',
'new_id': 'Swimmer-v3'
},
{
'old_id': 'Hopper-v2',
'new_id': 'Hopper-v3'
},
{
'old_id': 'Walker2d-v2',
'new_id': 'Walker2d-v3'
},
{
'old_id': 'HalfCheetah-v2',
'new_id': 'HalfCheetah-v3'
},
{
'old_id': 'Ant-v2',
'new_id': 'Ant-v3'
},
{
'old_id': 'Humanoid-v2',
'new_id': 'Humanoid-v3'
},
{"old_id": "Swimmer-v2", "new_id": "Swimmer-v3"},
{"old_id": "Hopper-v2", "new_id": "Hopper-v3"},
{"old_id": "Walker2d-v2", "new_id": "Walker2d-v3"},
{"old_id": "HalfCheetah-v2", "new_id": "HalfCheetah-v3"},
{"old_id": "Ant-v2", "new_id": "Ant-v3"},
{"old_id": "Humanoid-v2", "new_id": "Humanoid-v3"},
)
for test_case in test_cases:
verify_environments_match(test_case['old_id'], test_case['new_id'])
verify_environments_match(test_case["old_id"], test_case["new_id"])
# Raises KeyError because the new envs have extra info
with self.assertRaises(KeyError):
verify_environments_match('Swimmer-v3', 'Swimmer-v2')
verify_environments_match("Swimmer-v3", "Swimmer-v2")
# Raises KeyError because the new envs have extra info
with self.assertRaises(KeyError):
verify_environments_match('Humanoid-v3', 'Humanoid-v2')
verify_environments_match("Humanoid-v3", "Humanoid-v2")
# Raises KeyError because the new envs have extra info
with self.assertRaises(KeyError):
verify_environments_match('Swimmer-v3', 'Swimmer-v2')
verify_environments_match("Swimmer-v3", "Swimmer-v2")
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()

View File

@@ -4,76 +4,87 @@ from gym import error, envs
from gym.envs import registration
from gym.envs.classic_control import cartpole
class ArgumentEnv(gym.Env):
def __init__(self, arg1, arg2, arg3):
self.arg1 = arg1
self.arg2 = arg2
self.arg3 = arg3
gym.register(
id='test.ArgumentEnv-v0',
entry_point='gym.envs.tests.test_registration:ArgumentEnv',
id="test.ArgumentEnv-v0",
entry_point="gym.envs.tests.test_registration:ArgumentEnv",
kwargs={
'arg1': 'arg1',
'arg2': 'arg2',
}
"arg1": "arg1",
"arg2": "arg2",
},
)
def test_make():
env = envs.make('CartPole-v0')
assert env.spec.id == 'CartPole-v0'
env = envs.make("CartPole-v0")
assert env.spec.id == "CartPole-v0"
assert isinstance(env.unwrapped, cartpole.CartPoleEnv)
def test_make_with_kwargs():
env = envs.make('test.ArgumentEnv-v0', arg2='override_arg2', arg3='override_arg3')
assert env.spec.id == 'test.ArgumentEnv-v0'
env = envs.make("test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3")
assert env.spec.id == "test.ArgumentEnv-v0"
assert isinstance(env.unwrapped, ArgumentEnv)
assert env.arg1 == 'arg1'
assert env.arg2 == 'override_arg2'
assert env.arg3 == 'override_arg3'
assert env.arg1 == "arg1"
assert env.arg2 == "override_arg2"
assert env.arg3 == "override_arg3"
def test_make_deprecated():
try:
envs.make('Humanoid-v0')
envs.make("Humanoid-v0")
except error.Error:
pass
else:
assert False
def test_spec():
spec = envs.spec('CartPole-v0')
assert spec.id == 'CartPole-v0'
spec = envs.spec("CartPole-v0")
assert spec.id == "CartPole-v0"
def test_spec_with_kwargs():
map_name_value = '8x8'
env = gym.make('FrozenLake-v0', map_name=map_name_value)
assert env.spec._kwargs['map_name'] == map_name_value
map_name_value = "8x8"
env = gym.make("FrozenLake-v0", map_name=map_name_value)
assert env.spec._kwargs["map_name"] == map_name_value
def test_missing_lookup():
registry = registration.EnvRegistry()
registry.register(id='Test-v0', entry_point=None)
registry.register(id='Test-v15', entry_point=None)
registry.register(id='Test-v9', entry_point=None)
registry.register(id='Other-v100', entry_point=None)
registry.register(id="Test-v0", entry_point=None)
registry.register(id="Test-v15", entry_point=None)
registry.register(id="Test-v9", entry_point=None)
registry.register(id="Other-v100", entry_point=None)
try:
registry.spec('Test-v1') # must match an env name but not the version above
registry.spec("Test-v1") # must match an env name but not the version above
except error.DeprecatedEnv:
pass
else:
assert False
try:
registry.spec('Unknown-v1')
registry.spec("Unknown-v1")
except error.UnregisteredEnv:
pass
else:
assert False
def test_malformed_lookup():
registry = registration.EnvRegistry()
try:
registry.spec(u'“Breakout-v0”')
registry.spec(u"“Breakout-v0”")
except error.Error as e:
assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e)
assert "malformed environment ID" in "{}".format(
e
), "Unexpected message: {}".format(e)
else:
assert False

View File

@@ -8,5 +8,3 @@ from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv
from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv
from gym.envs.toy_text.cliffwalking import CliffWalkingEnv
from gym.envs.toy_text.taxi import TaxiEnv
from gym.envs.toy_text.guessing_game import GuessingGame
from gym.envs.toy_text.hotter_colder import HotterColder

View File

@@ -72,12 +72,12 @@ class BlackjackEnv(gym.Env):
by Sutton and Barto.
http://incompleteideas.net/book/the-book-2nd.html
"""
def __init__(self, natural=False):
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Tuple((
spaces.Discrete(32),
spaces.Discrete(11),
spaces.Discrete(2)))
self.observation_space = spaces.Tuple(
(spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))
)
self.seed()
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
@@ -96,16 +96,16 @@ class BlackjackEnv(gym.Env):
self.player.append(draw_card(self.np_random))
if is_bust(self.player):
done = True
reward = -1.
reward = -1.0
else:
done = False
reward = 0.
reward = 0.0
else: # stick: play out the dealers hand, and score
done = True
while sum_hand(self.dealer) < 17:
self.dealer.append(draw_card(self.np_random))
reward = cmp(score(self.player), score(self.dealer))
if self.natural and is_natural(self.player) and reward == 1.:
if self.natural and is_natural(self.player) and reward == 1.0:
reward = 1.5
return self._get_obs(), reward, done, {}

View File

@@ -30,7 +30,8 @@ class CliffWalkingEnv(discrete.DiscreteEnv):
Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward
and a reset to the start. An episode terminates when the agent reaches the goal.
"""
metadata = {'render.modes': ['human', 'ansi']}
metadata = {"render.modes": ["human", "ansi"]}
def __init__(self):
self.shape = (4, 12)
@@ -89,8 +90,8 @@ class CliffWalkingEnv(discrete.DiscreteEnv):
is_done = tuple(new_position) == terminal_state
return [(1.0, new_state, -1, is_done)]
def render(self, mode='human'):
outfile = StringIO() if mode == 'ansi' else sys.stdout
def render(self, mode="human"):
outfile = StringIO() if mode == "ansi" else sys.stdout
for s in range(self.nS):
position = np.unravel_index(s, self.shape)
@@ -108,12 +109,12 @@ class CliffWalkingEnv(discrete.DiscreteEnv):
output = output.lstrip()
if position[1] == self.shape[1] - 1:
output = output.rstrip()
output += '\n'
output += "\n"
outfile.write(output)
outfile.write('\n')
outfile.write("\n")
# No need to return anything for human
if mode != 'human':
if mode != "human":
with closing(outfile):
return outfile.getvalue()
return outfile.getvalue()

View File

@@ -29,6 +29,7 @@ class DiscreteEnv(Env):
"""
def __init__(self, nS, nA, P, isd):
self.P = P
self.isd = isd

View File

@@ -13,12 +13,7 @@ RIGHT = 2
UP = 3
MAPS = {
"4x4": [
"SFFF",
"FHFH",
"FFFH",
"HFFG"
],
"4x4": ["SFFF", "FHFH", "FFFH", "HFFG"],
"8x8": [
"SFFFFFFF",
"FFFFFFFF",
@@ -27,7 +22,7 @@ MAPS = {
"FFFHFFFF",
"FHHFFFHF",
"FHFFHFHF",
"FFFHFFFG"
"FFFHFFFG",
],
}
@@ -53,17 +48,17 @@ def generate_random_map(size=8, p=0.8):
c_new = c + y
if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
continue
if res[r_new][c_new] == 'G':
if res[r_new][c_new] == "G":
return True
if (res[r_new][c_new] != 'H'):
if res[r_new][c_new] != "H":
frontier.append((r_new, c_new))
return False
while not valid:
p = min(1, p)
res = np.random.choice(['F', 'H'], (size, size), p=[p, 1-p])
res[0][0] = 'S'
res[-1][-1] = 'G'
res = np.random.choice(["F", "H"], (size, size), p=[p, 1 - p])
res[0][0] = "S"
res[-1][-1] = "G"
valid = is_valid(res)
return ["".join(x) for x in res]
@@ -94,27 +89,27 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
You receive a reward of 1 if you reach the goal, and zero otherwise.
"""
metadata = {'render.modes': ['human', 'ansi']}
metadata = {"render.modes": ["human", "ansi"]}
def __init__(self, desc=None, map_name="4x4", is_slippery=True):
if desc is None and map_name is None:
desc = generate_random_map()
elif desc is None:
desc = MAPS[map_name]
self.desc = desc = np.asarray(desc, dtype='c')
self.desc = desc = np.asarray(desc, dtype="c")
self.nrow, self.ncol = nrow, ncol = desc.shape
self.reward_range = (0, 1)
nA = 4
nS = nrow * ncol
isd = np.array(desc == b'S').astype('float64').ravel()
isd = np.array(desc == b"S").astype("float64").ravel()
isd /= isd.sum()
P = {s: {a: [] for a in range(nA)} for s in range(nS)}
def to_s(row, col):
return row*ncol + col
return row * ncol + col
def inc(row, col, a):
if a == LEFT:
@@ -131,8 +126,8 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
newrow, newcol = inc(row, col, action)
newstate = to_s(newrow, newcol)
newletter = desc[newrow, newcol]
done = bytes(newletter) in b'GH'
reward = float(newletter == b'G')
done = bytes(newletter) in b"GH"
reward = float(newletter == b"G")
return newstate, reward, done
for row in range(nrow):
@@ -141,36 +136,34 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
for a in range(4):
li = P[s][a]
letter = desc[row, col]
if letter in b'GH':
if letter in b"GH":
li.append((1.0, s, 0, True))
else:
if is_slippery:
for b in [(a - 1) % 4, a, (a + 1) % 4]:
li.append((
1. / 3.,
*update_probability_matrix(row, col, b)
))
li.append(
(1.0 / 3.0, *update_probability_matrix(row, col, b))
)
else:
li.append((
1., *update_probability_matrix(row, col, a)
))
li.append((1.0, *update_probability_matrix(row, col, a)))
super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
def render(self, mode='human'):
outfile = StringIO() if mode == 'ansi' else sys.stdout
def render(self, mode="human"):
outfile = StringIO() if mode == "ansi" else sys.stdout
row, col = self.s // self.ncol, self.s % self.ncol
desc = self.desc.tolist()
desc = [[c.decode('utf-8') for c in line] for line in desc]
desc = [[c.decode("utf-8") for c in line] for line in desc]
desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
if self.lastaction is not None:
outfile.write(" ({})\n".format(
["Left", "Down", "Right", "Up"][self.lastaction]))
outfile.write(
" ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction])
)
else:
outfile.write("\n")
outfile.write("\n".join(''.join(line) for line in desc)+"\n")
outfile.write("\n".join("".join(line) for line in desc) + "\n")
if mode != 'human':
if mode != "human":
with closing(outfile):
return outfile.getvalue()

View File

@@ -37,12 +37,15 @@ class GuessingGame(gym.Env):
The perfect agent would likely learn the bounds of the action space (without referring
to them explicitly) and then follow binary tree style exploration towards to goal number
"""
def __init__(self):
self.range = 1000 # Randomly selected number is within +/- this value
self.bounds = 10000
self.action_space = spaces.Box(low=np.array([-self.bounds]).astype(np.float32) ,
high=np.array([self.bounds]).astype(np.float32) )
self.action_space = spaces.Box(
low=np.array([-self.bounds]).astype(np.float32),
high=np.array([self.bounds]).astype(np.float32),
)
self.observation_space = spaces.Discrete(4)
self.number = 0
@@ -77,7 +80,11 @@ class GuessingGame(gym.Env):
reward = 0
done = False
if (self.number - self.range * 0.01) < action < (self.number + self.range * 0.01):
if (
(self.number - self.range * 0.01)
< action
< (self.number + self.range * 0.01)
):
reward = 1
done = True
@@ -85,7 +92,12 @@ class GuessingGame(gym.Env):
if self.guess_count >= self.guess_max:
done = True
return self.observation, reward, done, {"number": self.number, "guesses": self.guess_count}
return (
self.observation,
reward,
done,
{"number": self.number, "guesses": self.guess_count},
)
def reset(self):
self.number = self.np_random.uniform(-self.range, self.range)

View File

@@ -22,11 +22,15 @@ class HotterColder(gym.Env):
increase the rate in which is guesses in that direction until the reward reaches
its maximum
"""
def __init__(self):
self.range = 1000 # +/- the value number can be between
self.bounds = 2000 # Action space bounds
self.action_space = spaces.Box(low=np.array([-self.bounds]).astype(np.float32), high=np.array([self.bounds]).astype(np.float32))
self.action_space = spaces.Box(
low=np.array([-self.bounds]).astype(np.float32),
high=np.array([self.bounds]).astype(np.float32),
)
self.observation_space = spaces.Discrete(4)
self.number = 0
@@ -58,12 +62,20 @@ class HotterColder(gym.Env):
elif action > self.number:
self.observation = 3
reward = ((min(action, self.number) + self.bounds) / (max(action, self.number) + self.bounds)) ** 2
reward = (
(min(action, self.number) + self.bounds)
/ (max(action, self.number) + self.bounds)
) ** 2
self.guess_count += 1
done = self.guess_count >= self.guess_max
return self.observation, reward[0], done, {"number": self.number, "guesses": self.guess_count}
return (
self.observation,
reward[0],
done,
{"number": self.number, "guesses": self.guess_count},
)
def reset(self):
self.number = self.np_random.uniform(-self.range, self.range)

View File

@@ -36,15 +36,19 @@ class KellyCoinflipEnv(gym.Env):
mistaken bet could end the episode immediately; it's not clear to me which version
would be better.) For a harder version which randomizes the 3 key parameters, see the
Generalized Kelly coinflip game."""
metadata = {'render.modes': ['human']}
metadata = {"render.modes": ["human"]}
def __init__(self, initial_wealth=25.0, edge=0.6, max_wealth=250.0, max_rounds=300):
self.action_space = spaces.Discrete(int(max_wealth * 100)) # betting in penny
# increments
self.observation_space = spaces.Tuple((
spaces.Box(0, max_wealth, [1], dtype=np.float32), # (w,b)
spaces.Discrete(max_rounds + 1)))
self.observation_space = spaces.Tuple(
(
spaces.Box(0, max_wealth, [1], dtype=np.float32), # (w,b)
spaces.Discrete(max_rounds + 1),
)
)
self.reward_range = (0, max_wealth)
self.edge = edge
self.wealth = initial_wealth
@@ -61,7 +65,9 @@ class KellyCoinflipEnv(gym.Env):
return [seed]
def step(self, action):
bet_in_dollars = min(action/100.0, self.wealth) # action = desired bet in pennies
bet_in_dollars = min(
action / 100.0, self.wealth
) # action = desired bet in pennies
self.rounds -= 1
coinflip = flip(self.edge, self.np_random)
@@ -80,7 +86,7 @@ class KellyCoinflipEnv(gym.Env):
self.wealth = self.initial_wealth
return self._get_obs()
def render(self, mode='human'):
def render(self, mode="human"):
print("Current wealth: ", self.wealth, "; Rounds left: ", self.rounds)
@@ -106,11 +112,21 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
for the edge case alone suggests that the Bayes-optimal value may be very close to
what one would calculate using a decision tree for any specific case), and
represents a good challenge for RL agents."""
metadata = {'render.modes': ['human']}
def __init__(self, initial_wealth=25.0, edge_prior_alpha=7, edge_prior_beta=3,
max_wealth_alpha=5.0, max_wealth_m=200.0, max_rounds_mean=300.0,
max_rounds_sd=25.0, reseed=True, clip_distributions=False):
metadata = {"render.modes": ["human"]}
def __init__(
self,
initial_wealth=25.0,
edge_prior_alpha=7,
edge_prior_beta=3,
max_wealth_alpha=5.0,
max_wealth_m=200.0,
max_rounds_mean=300.0,
max_rounds_sd=25.0,
reseed=True,
clip_distributions=False,
):
# clip_distributions=True asserts that state and action space are not modified at reset()
# store the hyper-parameters for passing back into __init__() during resets so
@@ -126,28 +142,42 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
self.max_rounds_sd = max_rounds_sd
self.clip_distributions = clip_distributions
if reseed or not hasattr(self, 'np_random'):
if reseed or not hasattr(self, "np_random"):
self.seed()
# draw this game's set of parameters:
edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta)
if self.clip_distributions:
# (clip/resample some parameters to be able to fix obs/action space sizes/bounds)
max_wealth_bound = round(genpareto.ppf(0.85, max_wealth_alpha, max_wealth_m))
max_wealth_bound = round(
genpareto.ppf(0.85, max_wealth_alpha, max_wealth_m)
)
max_wealth = max_wealth_bound + 1.0
while max_wealth > max_wealth_bound:
max_wealth = round(genpareto.rvs(max_wealth_alpha, max_wealth_m,
random_state=self.np_random))
max_rounds_bound = int(round(norm.ppf(0.99, max_rounds_mean, max_rounds_sd)))
max_wealth = round(
genpareto.rvs(
max_wealth_alpha, max_wealth_m, random_state=self.np_random
)
)
max_rounds_bound = int(
round(norm.ppf(0.99, max_rounds_mean, max_rounds_sd))
)
max_rounds = max_rounds_bound + 1
while max_rounds > max_rounds_bound:
max_rounds = int(round(self.np_random.normal(max_rounds_mean, max_rounds_sd)))
max_rounds = int(
round(self.np_random.normal(max_rounds_mean, max_rounds_sd))
)
else:
max_wealth = round(genpareto.rvs(max_wealth_alpha, max_wealth_m,
random_state=self.np_random))
max_wealth = round(
genpareto.rvs(
max_wealth_alpha, max_wealth_m, random_state=self.np_random
)
)
max_wealth_bound = max_wealth
max_rounds = int(round(self.np_random.normal(max_rounds_mean, max_rounds_sd)))
max_rounds = int(
round(self.np_random.normal(max_rounds_mean, max_rounds_sd))
)
max_rounds_bound = max_rounds
# add an additional global variable which is the sufficient statistic for the
@@ -161,13 +191,18 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
self.rounds_elapsed = 0
# the rest proceeds as before:
self.action_space = spaces.Discrete(int(max_wealth_bound*100))
self.observation_space = spaces.Tuple((
spaces.Box(0, max_wealth_bound, shape=[1], dtype=np.float32), # current wealth
spaces.Discrete(max_rounds_bound+1), # rounds elapsed
spaces.Discrete(max_rounds_bound+1), # wins
spaces.Discrete(max_rounds_bound+1), # losses
spaces.Box(0, max_wealth_bound, [1], dtype=np.float32))) # maximum observed wealth
self.action_space = spaces.Discrete(int(max_wealth_bound * 100))
self.observation_space = spaces.Tuple(
(
spaces.Box(
0, max_wealth_bound, shape=[1], dtype=np.float32
), # current wealth
spaces.Discrete(max_rounds_bound + 1), # rounds elapsed
spaces.Discrete(max_rounds_bound + 1), # wins
spaces.Discrete(max_rounds_bound + 1), # losses
spaces.Box(0, max_wealth_bound, [1], dtype=np.float32),
)
) # maximum observed wealth
self.reward_range = (0, max_wealth)
self.edge = edge
self.wealth = self.initial_wealth
@@ -180,7 +215,7 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
return [seed]
def step(self, action):
bet_in_dollars = min(action/100.0, self.wealth)
bet_in_dollars = min(action / 100.0, self.wealth)
self.rounds -= 1
@@ -200,25 +235,42 @@ class KellyCoinflipGeneralizedEnv(gym.Env):
return self._get_obs(), reward, done, {}
def _get_obs(self):
return (np.array([float(self.wealth)]), self.rounds_elapsed, self.wins,
self.losses, np.array([float(self.max_ever_wealth)]))
return (
np.array([float(self.wealth)]),
self.rounds_elapsed,
self.wins,
self.losses,
np.array([float(self.max_ever_wealth)]),
)
def reset(self):
# re-init everything to draw new parameters etc, but preserve the RNG for
# reproducibility and pass in the same hyper-parameters as originally specified:
self.__init__(initial_wealth=self.initial_wealth,
edge_prior_alpha=self.edge_prior_alpha,
edge_prior_beta=self.edge_prior_beta,
max_wealth_alpha=self.max_wealth_alpha,
max_wealth_m=self.max_wealth_m,
max_rounds_mean=self.max_rounds_mean,
max_rounds_sd=self.max_rounds_sd,
reseed=False,
clip_distributions=self.clip_distributions)
self.__init__(
initial_wealth=self.initial_wealth,
edge_prior_alpha=self.edge_prior_alpha,
edge_prior_beta=self.edge_prior_beta,
max_wealth_alpha=self.max_wealth_alpha,
max_wealth_m=self.max_wealth_m,
max_rounds_mean=self.max_rounds_mean,
max_rounds_sd=self.max_rounds_sd,
reseed=False,
clip_distributions=self.clip_distributions,
)
return self._get_obs()
def render(self, mode='human'):
print("Current wealth: ", self.wealth, "; Rounds left: ", self.rounds,
"; True edge: ", self.edge, "; True max wealth: ", self.max_wealth,
"; True stopping time: ", self.max_rounds, "; Rounds left: ",
self.max_rounds - self.rounds_elapsed)
def render(self, mode="human"):
print(
"Current wealth: ",
self.wealth,
"; Rounds left: ",
self.rounds,
"; True edge: ",
self.edge,
"; True max wealth: ",
self.max_wealth,
"; True stopping time: ",
self.max_rounds,
"; Rounds left: ",
self.max_rounds - self.rounds_elapsed,
)

View File

@@ -22,6 +22,7 @@ class NChainEnv(gym.Env):
A Bayesian Framework for Reinforcement Learning by Malcolm Strens (2000)
http://ceit.aut.ac.ir/~shiry/lecture/machine-learning/papers/BRL-2000.pdf
"""
def __init__(self, n=5, slip=0.2, small=2, large=10):
self.n = n
self.slip = slip # probability of 'slipping' an action

View File

@@ -14,6 +14,7 @@ class RouletteEnv(gym.Env):
The last action (38) stops the rollout for a return of 0 (walking away)
"""
def __init__(self, spots=37):
self.n = spots + 1
self.action_space = spaces.Discrete(self.n)

View File

@@ -26,7 +26,7 @@ class TaxiEnv(discrete.DiscreteEnv):
There are four designated locations in the grid world indicated by R(ed), G(reen), Y(ellow), and B(lue). When the episode starts, the taxi starts off at a random square and the passenger is at a random location. The taxi drives to the passenger's location, picks up the passenger, drives to the passenger's destination (another one of the four specified locations), and then drops off the passenger. Once the passenger is dropped off, the episode ends.
Observations:
There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the taxi), and 4 destination locations.
There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the taxi), and 4 destination locations.
Passenger locations:
- 0: R(ed)
@@ -65,10 +65,11 @@ class TaxiEnv(discrete.DiscreteEnv):
state space is represented by:
(taxi_row, taxi_col, passenger_location, destination)
"""
metadata = {'render.modes': ['human', 'ansi']}
metadata = {"render.modes": ["human", "ansi"]}
def __init__(self):
self.desc = np.asarray(MAP, dtype='c')
self.desc = np.asarray(MAP, dtype="c")
self.locs = locs = [(0, 0), (0, 4), (4, 0), (4, 3)]
@@ -79,8 +80,10 @@ class TaxiEnv(discrete.DiscreteEnv):
max_col = num_columns - 1
initial_state_distrib = np.zeros(num_states)
num_actions = 6
P = {state: {action: []
for action in range(num_actions)} for state in range(num_states)}
P = {
state: {action: [] for action in range(num_actions)}
for state in range(num_states)
}
for row in range(num_rows):
for col in range(num_columns):
for pass_idx in range(len(locs) + 1): # +1 for being inside taxi
@@ -91,7 +94,9 @@ class TaxiEnv(discrete.DiscreteEnv):
for action in range(num_actions):
# defaults
new_row, new_col, new_pass_idx = row, col, pass_idx
reward = -1 # default reward when there is no pickup/dropoff
reward = (
-1
) # default reward when there is no pickup/dropoff
done = False
taxi_loc = (row, col)
@@ -104,7 +109,7 @@ class TaxiEnv(discrete.DiscreteEnv):
elif action == 3 and self.desc[1 + row, 2 * col] == b":":
new_col = max(col - 1, 0)
elif action == 4: # pickup
if (pass_idx < 4 and taxi_loc == locs[pass_idx]):
if pass_idx < 4 and taxi_loc == locs[pass_idx]:
new_pass_idx = 4
else: # passenger not at location
reward = -10
@@ -118,12 +123,13 @@ class TaxiEnv(discrete.DiscreteEnv):
else: # dropoff at wrong location
reward = -10
new_state = self.encode(
new_row, new_col, new_pass_idx, dest_idx)
P[state][action].append(
(1.0, new_state, reward, done))
new_row, new_col, new_pass_idx, dest_idx
)
P[state][action].append((1.0, new_state, reward, done))
initial_state_distrib /= initial_state_distrib.sum()
discrete.DiscreteEnv.__init__(
self, num_states, num_actions, P, initial_state_distrib)
self, num_states, num_actions, P, initial_state_distrib
)
def encode(self, taxi_row, taxi_col, pass_loc, dest_idx):
# (5) 5, 5, 4
@@ -148,32 +154,44 @@ class TaxiEnv(discrete.DiscreteEnv):
assert 0 <= i < 5
return reversed(out)
def render(self, mode='human'):
outfile = StringIO() if mode == 'ansi' else sys.stdout
def render(self, mode="human"):
outfile = StringIO() if mode == "ansi" else sys.stdout
out = self.desc.copy().tolist()
out = [[c.decode('utf-8') for c in line] for line in out]
out = [[c.decode("utf-8") for c in line] for line in out]
taxi_row, taxi_col, pass_idx, dest_idx = self.decode(self.s)
def ul(x): return "_" if x == " " else x
def ul(x):
return "_" if x == " " else x
if pass_idx < 4:
out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize(
out[1 + taxi_row][2 * taxi_col + 1], 'yellow', highlight=True)
out[1 + taxi_row][2 * taxi_col + 1], "yellow", highlight=True
)
pi, pj = self.locs[pass_idx]
out[1 + pi][2 * pj + 1] = utils.colorize(out[1 + pi][2 * pj + 1], 'blue', bold=True)
out[1 + pi][2 * pj + 1] = utils.colorize(
out[1 + pi][2 * pj + 1], "blue", bold=True
)
else: # passenger in taxi
out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize(
ul(out[1 + taxi_row][2 * taxi_col + 1]), 'green', highlight=True)
ul(out[1 + taxi_row][2 * taxi_col + 1]), "green", highlight=True
)
di, dj = self.locs[dest_idx]
out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], 'magenta')
out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], "magenta")
outfile.write("\n".join(["".join(row) for row in out]) + "\n")
if self.lastaction is not None:
outfile.write(" ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction]))
outfile.write(
" ({})\n".format(
["South", "North", "East", "West", "Pickup", "Dropoff"][
self.lastaction
]
)
)
else:
outfile.write("\n")
# No need to return anything for human
if mode != 'human':
if mode != "human":
with closing(outfile):
return outfile.getvalue()

View File

@@ -2,4 +2,3 @@ from gym.envs.unittest.cube_crash import CubeCrash
from gym.envs.unittest.cube_crash import CubeCrashSparse
from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack
from gym.envs.unittest.memorize_digits import MemorizeDigits

View File

@@ -34,27 +34,30 @@ FIELD_W = 32
FIELD_H = 40
HOLE_WIDTH = 8
color_black = np.array((0,0,0)).astype('float32')
color_white = np.array((255,255,255)).astype('float32')
color_green = np.array((0,255,0)).astype('float32')
color_black = np.array((0, 0, 0)).astype("float32")
color_white = np.array((255, 255, 255)).astype("float32")
color_green = np.array((0, 255, 0)).astype("float32")
class CubeCrash(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 60,
'video.res_w' : FIELD_W,
'video.res_h' : FIELD_H,
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 60,
"video.res_w": FIELD_W,
"video.res_h": FIELD_H,
}
use_shaped_reward = True
use_black_screen = False
use_random_colors = False # Makes env too hard
use_black_screen = False
use_random_colors = False # Makes env too hard
def __init__(self):
self.seed()
self.viewer = None
self.observation_space = spaces.Box(0, 255, (FIELD_H,FIELD_W,3), dtype=np.uint8)
self.observation_space = spaces.Box(
0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8
)
self.action_space = spaces.Discrete(3)
self.reset()
@@ -64,39 +67,59 @@ class CubeCrash(gym.Env):
return [seed]
def random_color(self):
return np.array([
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
]).astype('uint8')
return np.array(
[
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
]
).astype("uint8")
def reset(self):
self.cube_x = self.np_random.randint(low=3, high=FIELD_W-3)
self.cube_y = self.np_random.randint(low=3, high=FIELD_H//6)
self.hole_x = self.np_random.randint(low=HOLE_WIDTH, high=FIELD_W-HOLE_WIDTH)
self.cube_x = self.np_random.randint(low=3, high=FIELD_W - 3)
self.cube_y = self.np_random.randint(low=3, high=FIELD_H // 6)
self.hole_x = self.np_random.randint(low=HOLE_WIDTH, high=FIELD_W - HOLE_WIDTH)
self.bg_color = self.random_color() if self.use_random_colors else color_black
self.potential = None
self.potential = None
self.step_n = 0
while 1:
self.wall_color = self.random_color() if self.use_random_colors else color_white
self.cube_color = self.random_color() if self.use_random_colors else color_green
if np.linalg.norm(self.wall_color - self.bg_color) < 50 or np.linalg.norm(self.cube_color - self.bg_color) < 50: continue
self.wall_color = (
self.random_color() if self.use_random_colors else color_white
)
self.cube_color = (
self.random_color() if self.use_random_colors else color_green
)
if (
np.linalg.norm(self.wall_color - self.bg_color) < 50
or np.linalg.norm(self.cube_color - self.bg_color) < 50
):
continue
break
return self.step(0)[0]
def step(self, action):
if action==0: pass
elif action==1: self.cube_x -= 1
elif action==2: self.cube_x += 1
else: assert 0, "Action %i is out of range" % action
if action == 0:
pass
elif action == 1:
self.cube_x -= 1
elif action == 2:
self.cube_x += 1
else:
assert 0, "Action %i is out of range" % action
self.cube_y += 1
self.step_n += 1
obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 )
obs[:,:,:] = self.bg_color
obs[FIELD_H-5:FIELD_H,:,:] = self.wall_color
obs[FIELD_H-5:FIELD_H, self.hole_x-HOLE_WIDTH//2:self.hole_x+HOLE_WIDTH//2+1, :] = self.bg_color
obs[self.cube_y-1:self.cube_y+2, self.cube_x-1:self.cube_x+2, :] = self.cube_color
obs = np.zeros((FIELD_H, FIELD_W, 3), dtype=np.uint8)
obs[:, :, :] = self.bg_color
obs[FIELD_H - 5 : FIELD_H, :, :] = self.wall_color
obs[
FIELD_H - 5 : FIELD_H,
self.hole_x - HOLE_WIDTH // 2 : self.hole_x + HOLE_WIDTH // 2 + 1,
:,
] = self.bg_color
obs[
self.cube_y - 1 : self.cube_y + 2, self.cube_x - 1 : self.cube_x + 2, :
] = self.cube_color
if self.use_black_screen and self.step_n > 4:
obs[:] = np.zeros((3,), dtype=np.uint8)
@@ -107,11 +130,11 @@ class CubeCrash(gym.Env):
reward = (self.potential - dist) * 0.01
self.potential = dist
if self.cube_x-1 < 0 or self.cube_x+1 >= FIELD_W:
if self.cube_x - 1 < 0 or self.cube_x + 1 >= FIELD_W:
done = True
reward = -1
elif self.cube_y+1 >= FIELD_H-5:
if dist >= HOLE_WIDTH//2:
elif self.cube_y + 1 >= FIELD_H - 5:
if dist >= HOLE_WIDTH // 2:
done = True
reward = -1
elif self.cube_y == FIELD_H:
@@ -120,12 +143,13 @@ class CubeCrash(gym.Env):
self.last_obs = obs
return obs, reward, done, {}
def render(self, mode='human'):
if mode == 'rgb_array':
def render(self, mode="human"):
if mode == "rgb_array":
return self.last_obs
elif mode == 'human':
elif mode == "human":
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(self.last_obs)
@@ -139,10 +163,11 @@ class CubeCrash(gym.Env):
self.viewer.close()
self.viewer = None
class CubeCrashSparse(CubeCrash):
use_shaped_reward = False
class CubeCrashScreenBecomesBlack(CubeCrash):
use_shaped_reward = False
use_black_screen = True

View File

@@ -32,88 +32,29 @@ from gym.utils import seeding
FIELD_W = 32
FIELD_H = 24
bogus_mnist = \
[[
" **** ",
"* *",
"* *",
"* *",
"* *",
" **** "
], [
" ** ",
" * * ",
" * ",
" * ",
" * ",
" *** "
], [
" **** ",
"* *",
" *",
" *** ",
"** ",
"******"
], [
" **** ",
"* *",
" ** ",
" *",
"* *",
" **** "
], [
" * * ",
" * * ",
" * * ",
" **** ",
" * ",
" * "
], [
" **** ",
" * ",
" **** ",
" * ",
" * ",
" **** "
], [
" *** ",
" * ",
" **** ",
" * * ",
" * * ",
" **** "
], [
" **** ",
" * ",
" * ",
" * ",
" * ",
" * "
], [
" **** ",
"* *",
" **** ",
"* *",
"* *",
" **** "
], [
" **** ",
"* *",
"* *",
" *****",
" *",
" **** "
]]
bogus_mnist = [
[" **** ", "* *", "* *", "* *", "* *", " **** "],
[" ** ", " * * ", " * ", " * ", " * ", " *** "],
[" **** ", "* *", " *", " *** ", "** ", "******"],
[" **** ", "* *", " ** ", " *", "* *", " **** "],
[" * * ", " * * ", " * * ", " **** ", " * ", " * "],
[" **** ", " * ", " **** ", " * ", " * ", " **** "],
[" *** ", " * ", " **** ", " * * ", " * * ", " **** "],
[" **** ", " * ", " * ", " * ", " * ", " * "],
[" **** ", "* *", " **** ", "* *", "* *", " **** "],
[" **** ", "* *", "* *", " *****", " *", " **** "],
]
color_black = np.array((0, 0, 0)).astype("float32")
color_white = np.array((255, 255, 255)).astype("float32")
color_black = np.array((0,0,0)).astype('float32')
color_white = np.array((255,255,255)).astype('float32')
class MemorizeDigits(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 60,
'video.res_w' : FIELD_W,
'video.res_h' : FIELD_H,
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": 60,
"video.res_w": FIELD_W,
"video.res_h": FIELD_H,
}
use_random_colors = False
@@ -121,12 +62,16 @@ class MemorizeDigits(gym.Env):
def __init__(self):
self.seed()
self.viewer = None
self.observation_space = spaces.Box(0, 255, (FIELD_H,FIELD_W,3), dtype=np.uint8)
self.observation_space = spaces.Box(
0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8
)
self.action_space = spaces.Discrete(10)
self.bogus_mnist = np.zeros( (10,6,6), dtype=np.uint8 )
self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8)
for digit in range(10):
for y in range(6):
self.bogus_mnist[digit,y,:] = [ord(char) for char in bogus_mnist[digit][y]]
self.bogus_mnist[digit, y, :] = [
ord(char) for char in bogus_mnist[digit][y]
]
self.reset()
def seed(self, seed=None):
@@ -134,20 +79,25 @@ class MemorizeDigits(gym.Env):
return [seed]
def random_color(self):
return np.array([
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
]).astype('uint8')
return np.array(
[
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
self.np_random.randint(low=0, high=255),
]
).astype("uint8")
def reset(self):
self.digit_x = self.np_random.randint(low=FIELD_W//5, high=FIELD_W//5*4)
self.digit_y = self.np_random.randint(low=FIELD_H//5, high=FIELD_H//5*4)
self.digit_x = self.np_random.randint(low=FIELD_W // 5, high=FIELD_W // 5 * 4)
self.digit_y = self.np_random.randint(low=FIELD_H // 5, high=FIELD_H // 5 * 4)
self.color_bg = self.random_color() if self.use_random_colors else color_black
self.step_n = 0
while 1:
self.color_digit = self.random_color() if self.use_random_colors else color_white
if np.linalg.norm(self.color_digit - self.color_bg) < 50: continue
self.color_digit = (
self.random_color() if self.use_random_colors else color_white
)
if np.linalg.norm(self.color_digit - self.color_bg) < 50:
continue
break
self.digit = -1
return self.step(0)[0]
@@ -156,29 +106,32 @@ class MemorizeDigits(gym.Env):
reward = -1
done = False
self.step_n += 1
if self.digit==-1:
if self.digit == -1:
pass
else:
if self.digit==action:
if self.digit == action:
reward = +1
done = self.step_n > 20 and 0==self.np_random.randint(low=0, high=5)
done = self.step_n > 20 and 0 == self.np_random.randint(low=0, high=5)
self.digit = self.np_random.randint(low=0, high=10)
obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 )
obs[:,:,:] = self.color_bg
digit_img = np.zeros( (6,6,3), dtype=np.uint8 )
obs = np.zeros((FIELD_H, FIELD_W, 3), dtype=np.uint8)
obs[:, :, :] = self.color_bg
digit_img = np.zeros((6, 6, 3), dtype=np.uint8)
digit_img[:] = self.color_bg
xxx = self.bogus_mnist[self.digit]==42
xxx = self.bogus_mnist[self.digit] == 42
digit_img[xxx] = self.color_digit
obs[self.digit_y-3:self.digit_y+3, self.digit_x-3:self.digit_x+3] = digit_img
obs[
self.digit_y - 3 : self.digit_y + 3, self.digit_x - 3 : self.digit_x + 3
] = digit_img
self.last_obs = obs
return obs, reward, done, {}
def render(self, mode='human'):
if mode == 'rgb_array':
def render(self, mode="human"):
if mode == "rgb_array":
return self.last_obs
elif mode == 'human':
elif mode == "human":
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(self.last_obs)
@@ -191,4 +144,3 @@ class MemorizeDigits(gym.Env):
if self.viewer is not None:
self.viewer.close()
self.viewer = None

View File

@@ -1,87 +1,118 @@
import sys
class Error(Exception):
pass
# Local errors
class Unregistered(Error):
"""Raised when the user requests an item from the registry that does
not actually exist.
"""
pass
class UnregisteredEnv(Unregistered):
"""Raised when the user requests an env from the registry that does
not actually exist.
"""
pass
class UnregisteredBenchmark(Unregistered):
"""Raised when the user requests an env from the registry that does
not actually exist.
"""
pass
class DeprecatedEnv(Error):
"""Raised when the user requests an env from the registry with an
older version number than the latest env with the same name.
"""
pass
class UnseedableEnv(Error):
"""Raised when the user tries to seed an env that does not support
seeding.
"""
pass
class DependencyNotInstalled(Error):
pass
class UnsupportedMode(Exception):
"""Raised when the user requests a rendering mode not supported by the
environment.
"""
pass
class ResetNeeded(Exception):
"""When the monitor is active, raised when the user tries to step an
environment that's already done.
"""
pass
class ResetNotAllowed(Exception):
"""When the monitor is active, raised when the user tries to step an
environment that's not yet done.
"""
pass
class InvalidAction(Exception):
"""Raised when the user performs an action not contained within the
action space
"""
pass
# API errors
class APIError(Error):
def __init__(self, message=None, http_body=None, http_status=None,
json_body=None, headers=None):
def __init__(
self,
message=None,
http_body=None,
http_status=None,
json_body=None,
headers=None,
):
super(APIError, self).__init__(message)
if http_body and hasattr(http_body, 'decode'):
if http_body and hasattr(http_body, "decode"):
try:
http_body = http_body.decode('utf-8')
http_body = http_body.decode("utf-8")
except:
http_body = ('<Could not decode body as utf-8. '
'Please report to gym@openai.com>')
http_body = (
"<Could not decode body as utf-8. "
"Please report to gym@openai.com>"
)
self._message = message
self.http_body = http_body
self.http_status = http_status
self.json_body = json_body
self.headers = headers or {}
self.request_id = self.headers.get('request-id', None)
self.request_id = self.headers.get("request-id", None)
def __unicode__(self):
if self.request_id is not None:
@@ -91,8 +122,8 @@ class APIError(Error):
return self._message
def __str__(self):
try: # Python 2
return unicode(self).encode('utf-8')
try: # Python 2
return unicode(self).encode("utf-8")
except NameError: # Python 3
return self.__unicode__()
@@ -102,31 +133,43 @@ class APIConnectionError(APIError):
class InvalidRequestError(APIError):
def __init__(self, message, param, http_body=None,
http_status=None, json_body=None, headers=None):
def __init__(
self,
message,
param,
http_body=None,
http_status=None,
json_body=None,
headers=None,
):
super(InvalidRequestError, self).__init__(
message, http_body, http_status, json_body,
headers)
message, http_body, http_status, json_body, headers
)
self.param = param
class AuthenticationError(APIError):
pass
class RateLimitError(APIError):
pass
# Video errors
class VideoRecorderError(Error):
pass
class InvalidFrame(Error):
pass
# Wrapper errors
class DoubleWrapperError(Error):
pass
@@ -138,8 +181,10 @@ class WrapAfterConfigureError(Error):
class RetriesExceededError(Error):
pass
# Vectorized environments errors
class AlreadyPendingCallError(Exception):
"""
Raised when `reset`, or `step` is called asynchronously (e.g. with
@@ -147,28 +192,35 @@ class AlreadyPendingCallError(Exception):
`step_async` (respectively) is called again (without a complete call to
`reset_wait`, or `step_wait` respectively).
"""
def __init__(self, message, name):
super(AlreadyPendingCallError, self).__init__(message)
self.name = name
class NoAsyncCallError(Exception):
"""
Raised when an asynchronous `reset`, or `step` is not running, but
`reset_wait`, or `step_wait` (respectively) is called.
"""
def __init__(self, message, name):
super(NoAsyncCallError, self).__init__(message)
self.name = name
class ClosedEnvironmentError(Exception):
"""
Trying to call `reset`, or `step`, while the environment is closed.
"""
pass
class CustomSpaceError(Exception):
"""
The space is a custom gym.Space instance, and is not supported by
`AsyncVectorEnv` with `shared_memory=True`.
"""
pass

View File

@@ -10,6 +10,7 @@ DISABLED = 50
MIN_LEVEL = 30
def set_level(level):
"""
Set logging threshold on current logger.
@@ -17,21 +18,26 @@ def set_level(level):
global MIN_LEVEL
MIN_LEVEL = level
def debug(msg, *args):
if MIN_LEVEL <= DEBUG:
print('%s: %s'%('DEBUG', msg % args))
print("%s: %s" % ("DEBUG", msg % args))
def info(msg, *args):
if MIN_LEVEL <= INFO:
print('%s: %s'%('INFO', msg % args))
print("%s: %s" % ("INFO", msg % args))
def warn(msg, *args):
if MIN_LEVEL <= WARN:
warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow"))
def error(msg, *args):
if MIN_LEVEL <= ERROR:
print(colorize('%s: %s'%('ERROR', msg % args), 'red'))
print(colorize("%s: %s" % ("ERROR", msg % args), "red"))
# DEPRECATED:
setLevel = set_level

View File

@@ -11,4 +11,16 @@ from gym.spaces.utils import flatten_space
from gym.spaces.utils import flatten
from gym.spaces.utils import unflatten
__all__ = ["Space", "Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict", "flatdim", "flatten_space", "flatten", "unflatten"]
__all__ = [
"Space",
"Box",
"Discrete",
"MultiDiscrete",
"MultiBinary",
"Tuple",
"Dict",
"flatdim",
"flatten_space",
"flatten",
"unflatten",
]

View File

@@ -21,23 +21,34 @@ class Box(Space):
Box(2,)
"""
def __init__(self, low, high, shape=None, dtype=np.float32):
assert dtype is not None, 'dtype must be explicitly provided. '
assert dtype is not None, "dtype must be explicitly provided. "
self.dtype = np.dtype(dtype)
# determine shape if it isn't provided directly
if shape is not None:
shape = tuple(shape)
assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match provided shape"
assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match provided shape"
assert (
np.isscalar(low) or low.shape == shape
), "low.shape doesn't match provided shape"
assert (
np.isscalar(high) or high.shape == shape
), "high.shape doesn't match provided shape"
elif not np.isscalar(low):
shape = low.shape
assert np.isscalar(high) or high.shape == shape, "high.shape doesn't match low.shape"
assert (
np.isscalar(high) or high.shape == shape
), "high.shape doesn't match low.shape"
elif not np.isscalar(high):
shape = high.shape
assert np.isscalar(low) or low.shape == shape, "low.shape doesn't match high.shape"
assert (
np.isscalar(low) or low.shape == shape
), "low.shape doesn't match high.shape"
else:
raise ValueError("shape must be provided or inferred from the shapes of low or high")
raise ValueError(
"shape must be provided or inferred from the shapes of low or high"
)
if np.isscalar(low):
low = np.full(shape, low, dtype=dtype)
@@ -54,11 +65,14 @@ class Box(Space):
return np.finfo(dtype).precision
else:
return np.inf
low_precision = _get_precision(self.low.dtype)
high_precision = _get_precision(self.high.dtype)
dtype_precision = _get_precision(self.dtype)
if min(low_precision, high_precision) > dtype_precision:
logger.warn("Box bound precision lowered by casting to {}".format(self.dtype))
logger.warn(
"Box bound precision lowered by casting to {}".format(self.dtype)
)
self.low = self.low.astype(self.dtype)
self.high = self.high.astype(self.dtype)
@@ -92,32 +106,33 @@ class Box(Space):
* (-oo, b] : shifted negative exponential distribution
* (-oo, oo) : normal distribution
"""
high = self.high if self.dtype.kind == 'f' \
else self.high.astype('int64') + 1
high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1
sample = np.empty(self.shape)
# Masking arrays which classify the coordinates according to interval
# type
unbounded = ~self.bounded_below & ~self.bounded_above
upp_bounded = ~self.bounded_below & self.bounded_above
low_bounded = self.bounded_below & ~self.bounded_above
bounded = self.bounded_below & self.bounded_above
unbounded = ~self.bounded_below & ~self.bounded_above
upp_bounded = ~self.bounded_below & self.bounded_above
low_bounded = self.bounded_below & ~self.bounded_above
bounded = self.bounded_below & self.bounded_above
# Vectorized sampling by interval type
sample[unbounded] = self.np_random.normal(
size=unbounded[unbounded].shape)
sample[unbounded] = self.np_random.normal(size=unbounded[unbounded].shape)
sample[low_bounded] = self.np_random.exponential(
size=low_bounded[low_bounded].shape) + self.low[low_bounded]
sample[low_bounded] = (
self.np_random.exponential(size=low_bounded[low_bounded].shape)
+ self.low[low_bounded]
)
sample[upp_bounded] = -self.np_random.exponential(
size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded]
sample[upp_bounded] = (
-self.np_random.exponential(size=upp_bounded[upp_bounded].shape)
+ self.high[upp_bounded]
)
sample[bounded] = self.np_random.uniform(low=self.low[bounded],
high=high[bounded],
size=bounded[bounded].shape)
if self.dtype.kind == 'i':
sample[bounded] = self.np_random.uniform(
low=self.low[bounded], high=high[bounded], size=bounded[bounded].shape
)
if self.dtype.kind == "i":
sample = np.floor(sample)
return sample.astype(self.dtype)
@@ -125,7 +140,9 @@ class Box(Space):
def contains(self, x):
if isinstance(x, list):
x = np.array(x) # Promote list to array for contains check
return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
return (
x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
)
def to_jsonable(self, sample_n):
return np.array(sample_n).tolist()
@@ -134,7 +151,14 @@ class Box(Space):
return [np.asarray(sample) for sample in sample_n]
def __repr__(self):
return "Box({}, {}, {}, {})".format(self.low.min(), self.high.max(), self.shape, self.dtype)
return "Box({}, {}, {}, {})".format(
self.low.min(), self.high.max(), self.shape, self.dtype
)
def __eq__(self, other):
return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
return (
isinstance(other, Box)
and (self.shape == other.shape)
and np.allclose(self.low, other.low)
and np.allclose(self.high, other.high)
)

View File

@@ -31,8 +31,11 @@ class Dict(Space):
})
})
"""
def __init__(self, spaces=None, **spaces_kwargs):
assert (spaces is None) or (not spaces_kwargs), 'Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)'
assert (spaces is None) or (
not spaces_kwargs
), "Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)"
if spaces is None:
spaces = spaces_kwargs
if isinstance(spaces, dict) and not isinstance(spaces, OrderedDict):
@@ -41,8 +44,12 @@ class Dict(Space):
spaces = OrderedDict(spaces)
self.spaces = spaces
for space in spaces.values():
assert isinstance(space, Space), 'Values of the dict should be instances of gym.Space'
super(Dict, self).__init__(None, None) # None for shape and dtype, since it'll require special handling
assert isinstance(
space, Space
), "Values of the dict should be instances of gym.Space"
super(Dict, self).__init__(
None, None
) # None for shape and dtype, since it'll require special handling
def seed(self, seed=None):
[space.seed(seed) for space in self.spaces.values()]
@@ -62,18 +69,24 @@ class Dict(Space):
def __getitem__(self, key):
return self.spaces[key]
def __iter__(self):
for key in self.spaces:
yield key
def __repr__(self):
return "Dict(" + ", ". join([str(k) + ":" + str(s) for k, s in self.spaces.items()]) + ")"
return (
"Dict("
+ ", ".join([str(k) + ":" + str(s) for k, s in self.spaces.items()])
+ ")"
)
def to_jsonable(self, sample_n):
# serialize as dict-repr of vectors
return {key: space.to_jsonable([sample[key] for sample in sample_n]) \
for key, space in self.spaces.items()}
return {
key: space.to_jsonable([sample[key] for sample in sample_n])
for key, space in self.spaces.items()
}
def from_jsonable(self, sample_n):
dict_of_list = {}

View File

@@ -3,13 +3,14 @@ from .space import Space
class Discrete(Space):
r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
Example::
>>> Discrete(2)
"""
def __init__(self, n):
assert n >= 0
self.n = n
@@ -21,7 +22,9 @@ class Discrete(Space):
def contains(self, x):
if isinstance(x, int):
as_int = x
elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
elif isinstance(x, (np.generic, np.ndarray)) and (
x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
):
as_int = int(x)
else:
return False

View File

@@ -3,13 +3,13 @@ from .space import Space
class MultiBinary(Space):
'''
An n-shape binary space.
"""
An n-shape binary space.
The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
Example Usage:
>> self.observation_space = spaces.MultiBinary(5)
>> self.observation_space.sample()
@@ -21,16 +21,17 @@ class MultiBinary(Space):
>> self.observation_space.sample()
array([[0, 0],
[0, 1],
[0, 1],
[1, 1]], dtype=int8)
'''
"""
def __init__(self, n):
self.n = n
if type(n) in [tuple, list, np.ndarray]:
input_n = n
else:
input_n = (n, )
input_n = (n,)
super(MultiBinary, self).__init__(input_n, np.int8)
def sample(self):
@@ -41,7 +42,7 @@ class MultiBinary(Space):
x = np.array(x) # Promote list to array for contains check
if self.shape != x.shape:
return False
return ((x==0) | (x==1)).all()
return ((x == 0) | (x == 1)).all()
def to_jsonable(self, sample_n):
return np.array(sample_n).tolist()

View File

@@ -22,18 +22,21 @@ class MultiDiscrete(Space):
MultiDiscrete([ 5, 2, 2 ])
"""
def __init__(self, nvec, dtype=np.int64):
"""
nvec: vector of counts of each categorical variable
"""
assert (np.array(nvec) > 0).all(), 'nvec (counts) have to be positive'
assert (np.array(nvec) > 0).all(), "nvec (counts) have to be positive"
self.nvec = np.asarray(nvec, dtype=dtype)
super(MultiDiscrete, self).__init__(self.nvec.shape, dtype)
def sample(self):
return (self.np_random.random_sample(self.nvec.shape)*self.nvec).astype(self.dtype)
return (self.np_random.random_sample(self.nvec.shape) * self.nvec).astype(
self.dtype
)
def contains(self, x):
if isinstance(x, list):

View File

@@ -15,8 +15,10 @@ class Space(object):
Moreover, some implementations of Reinforcement Learning algorithms might
not handle custom spaces properly. Use custom spaces with care.
"""
def __init__(self, shape=None, dtype=None):
import numpy as np # takes about 300-400ms to import, so we load lazily
self.shape = None if shape is None else tuple(shape)
self.dtype = None if dtype is None else np.dtype(dtype)
self._np_random = None
@@ -32,12 +34,12 @@ class Space(object):
return self._np_random
def sample(self):
"""Randomly sample an element of this space. Can be
"""Randomly sample an element of this space. Can be
uniform or non-uniform sampling based on boundedness of space."""
raise NotImplementedError
def seed(self, seed=None):
"""Seed the PRNG of this space. """
"""Seed the PRNG of this space."""
self._np_random, seed = seeding.np_random(seed)
return [seed]

View File

@@ -7,17 +7,31 @@ import pytest
from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict
@pytest.mark.parametrize("space", [
Discrete(3),
Box(low=0., high=np.inf, shape=(2,2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
@pytest.mark.parametrize(
"space",
[
Discrete(3),
Box(low=0.0, high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
),
}
),
],
)
def test_roundtripping(space):
sample_1 = space.sample()
sample_2 = space.sample()
@@ -38,53 +52,77 @@ def test_roundtripping(space):
assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p)
@pytest.mark.parametrize("space", [
Discrete(3),
Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
Box(low=-np.inf, high=np.inf, shape=(1,3)),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(6),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
@pytest.mark.parametrize(
"space",
[
Discrete(3),
Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
Box(low=-np.inf, high=np.inf, shape=(1, 3)),
Tuple([Discrete(5), Discrete(10)]),
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(6),
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
),
}
),
],
)
def test_equality(space):
space1 = space
space2 = copy(space)
assert space1 == space2, "Expected {} to equal {}".format(space1, space2)
@pytest.mark.parametrize("spaces", [
(Discrete(3), Discrete(4)),
(MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
(MultiBinary(8), MultiBinary(7)),
(Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
(Box(low=-np.inf,high=0., shape=(2,1)),
Box(low=0., high=np.inf, shape=(2,1))),
(Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
(Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
(Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
])
@pytest.mark.parametrize(
"spaces",
[
(Discrete(3), Discrete(4)),
(MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
(MultiBinary(8), MultiBinary(7)),
(
Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32),
),
(
Box(low=-np.inf, high=0.0, shape=(2, 1)),
Box(low=0.0, high=np.inf, shape=(2, 1)),
),
(Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
(Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
(Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
],
)
def test_inequality(spaces):
space1, space2 = spaces
assert space1 != space2, "Expected {} != {}".format(space1, space2)
@pytest.mark.parametrize("space", [
Discrete(5),
Box(low=0, high=255, shape=(2,), dtype='uint8'),
Box(low=-np.inf, high=np.inf, shape=(3,3)),
Box(low=1., high=np.inf, shape=(3,3)),
Box(low=-np.inf, high=2., shape=(3,3)),
])
@pytest.mark.parametrize(
"space",
[
Discrete(5),
Box(low=0, high=255, shape=(2,), dtype="uint8"),
Box(low=-np.inf, high=np.inf, shape=(3, 3)),
Box(low=1.0, high=np.inf, shape=(3, 3)),
Box(low=-np.inf, high=2.0, shape=(3, 3)),
],
)
def test_sample(space):
space.seed(0)
n_trials = 100
samples = np.array([space.sample() for _ in range(n_trials)])
expected_mean = 0.
expected_mean = 0.0
if isinstance(space, Box):
if space.is_bounded():
expected_mean = (space.high + space.low) / 2
@@ -93,23 +131,35 @@ def test_sample(space):
elif space.is_bounded("above"):
expected_mean = -1 + space.high
else:
expected_mean = 0.
expected_mean = 0.0
elif isinstance(space, Discrete):
expected_mean = space.n / 2
else:
raise NotImplementedError
np.testing.assert_allclose(expected_mean, samples.mean(), atol=3.0 * samples.std())
@pytest.mark.parametrize("spaces", [
(Discrete(5), MultiBinary(5)),
(Box(low=np.array([-10, 0]), high=np.array([10,10]), dtype=np.float32), MultiDiscrete([2, 2, 8])),
(Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8), Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8)),
(Dict({"position": Discrete(5)}), Tuple([Discrete(5)])),
(Dict({"position": Discrete(5)}), Discrete(5)),
(Tuple((Discrete(5),)), Discrete(5)),
(Box(low=np.array([-np.inf,0.]), high=np.array([0., np.inf])),
Box(low=np.array([-np.inf, 1.]), high=np.array([0., np.inf])))
])
@pytest.mark.parametrize(
"spaces",
[
(Discrete(5), MultiBinary(5)),
(
Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
MultiDiscrete([2, 2, 8]),
),
(
Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8),
Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8),
),
(Dict({"position": Discrete(5)}), Tuple([Discrete(5)])),
(Dict({"position": Discrete(5)}), Discrete(5)),
(Tuple((Discrete(5),)), Discrete(5)),
(
Box(low=np.array([-np.inf, 0.0]), high=np.array([0.0, np.inf])),
Box(low=np.array([-np.inf, 1.0]), high=np.array([0.0, np.inf])),
),
],
)
def test_class_inequality(spaces):
assert spaces[0] == spaces[0]
assert spaces[1] == spaces[1]
@@ -117,11 +167,14 @@ def test_class_inequality(spaces):
assert spaces[1] != spaces[0]
@pytest.mark.parametrize("space_fn", [
lambda: Dict(space1='abc'),
lambda: Dict({'space1': 'abc'}),
lambda: Tuple(['abc'])
])
@pytest.mark.parametrize(
"space_fn",
[
lambda: Dict(space1="abc"),
lambda: Dict({"space1": "abc"}),
lambda: Tuple(["abc"]),
],
)
def test_bad_space_calls(space_fn):
with pytest.raises(AssertionError):
space_fn()

View File

@@ -6,97 +6,181 @@ import pytest
from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Tuple, utils
@pytest.mark.parametrize(["space", "flatdim"], [
(Discrete(3), 3),
(Box(low=0., high=np.inf, shape=(2, 2)), 4),
(Tuple([Discrete(5), Discrete(10)]), 15),
(Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), 7),
(Tuple((Discrete(5), Discrete(2), Discrete(2))), 9),
(MultiDiscrete([2, 2, 100]), 3),
(MultiBinary(10), 10),
(Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}), 7),
])
@pytest.mark.parametrize(
["space", "flatdim"],
[
(Discrete(3), 3),
(Box(low=0.0, high=np.inf, shape=(2, 2)), 4),
(Tuple([Discrete(5), Discrete(10)]), 15),
(
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
7,
),
(Tuple((Discrete(5), Discrete(2), Discrete(2))), 9),
(MultiDiscrete([2, 2, 100]), 3),
(MultiBinary(10), 10),
(
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
),
}
),
7,
),
],
)
def test_flatdim(space, flatdim):
dim = utils.flatdim(space)
assert dim == flatdim, "Expected {} to equal {}".format(dim, flatdim)
@pytest.mark.parametrize("space", [
Discrete(3),
Box(low=0., high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
@pytest.mark.parametrize(
"space",
[
Discrete(3),
Box(low=0.0, high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
),
}
),
],
)
def test_flatten_space_boxes(space):
flat_space = utils.flatten_space(space)
assert isinstance(flat_space, Box), "Expected {} to equal {}".format(type(flat_space), Box)
assert isinstance(flat_space, Box), "Expected {} to equal {}".format(
type(flat_space), Box
)
flatdim = utils.flatdim(space)
(single_dim, ) = flat_space.shape
(single_dim,) = flat_space.shape
assert single_dim == flatdim, "Expected {} to equal {}".format(single_dim, flatdim)
@pytest.mark.parametrize("space", [
Discrete(3),
Box(low=0., high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
@pytest.mark.parametrize(
"space",
[
Discrete(3),
Box(low=0.0, high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
),
}
),
],
)
def test_flat_space_contains_flat_points(space):
some_samples = [space.sample() for _ in range(10)]
flattened_samples = [utils.flatten(space, sample) for sample in some_samples]
flat_space = utils.flatten_space(space)
for i, flat_sample in enumerate(flattened_samples):
assert flat_sample in flat_space,\
'Expected sample #{} {} to be in {}'.format(i, flat_sample, flat_space)
assert flat_sample in flat_space, "Expected sample #{} {} to be in {}".format(
i, flat_sample, flat_space
)
@pytest.mark.parametrize("space", [
Discrete(3),
Box(low=0., high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
@pytest.mark.parametrize(
"space",
[
Discrete(3),
Box(low=0.0, high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
),
}
),
],
)
def test_flatten_dim(space):
sample = utils.flatten(space, space.sample())
(single_dim, ) = sample.shape
(single_dim,) = sample.shape
flatdim = utils.flatdim(space)
assert single_dim == flatdim, "Expected {} to equal {}".format(single_dim, flatdim)
@pytest.mark.parametrize("space", [
Discrete(3),
Box(low=0., high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
@pytest.mark.parametrize(
"space",
[
Discrete(3),
Box(low=0.0, high=np.inf, shape=(2, 2)),
Tuple([Discrete(5), Discrete(10)]),
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(10),
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
),
}
),
],
)
def test_flatten_roundtripping(space):
some_samples = [space.sample() for _ in range(10)]
flattened_samples = [utils.flatten(space, sample) for sample in some_samples]
roundtripped_samples = [utils.unflatten(space, sample) for sample in flattened_samples]
for i, (original, roundtripped) in enumerate(zip(some_samples, roundtripped_samples)):
assert compare_nested(original, roundtripped), \
'Expected sample #{} {} to equal {}'.format(i, original, roundtripped)
roundtripped_samples = [
utils.unflatten(space, sample) for sample in flattened_samples
]
for i, (original, roundtripped) in enumerate(
zip(some_samples, roundtripped_samples)
):
assert compare_nested(
original, roundtripped
), "Expected sample #{} {} to equal {}".format(i, original, roundtripped)
def compare_nested(left, right):
@@ -104,7 +188,9 @@ def compare_nested(left, right):
return np.allclose(left, right)
elif isinstance(left, OrderedDict) and isinstance(right, OrderedDict):
res = len(left) == len(right)
for ((left_key, left_value), (right_key, right_value)) in zip(left.items(), right.items()):
for ((left_key, left_value), (right_key, right_value)) in zip(
left.items(), right.items()
):
if not res:
return False
res = left_key == right_key and compare_nested(left_value, right_value)
@@ -119,25 +205,48 @@ def compare_nested(left, right):
else:
return left == right
'''
"""
Expecteded flattened types are based off:
1. The type that the space is hardcoded as(ie. multi_discrete=np.int64, discrete=np.int64, multi_binary=np.int8)
2. The type that the space is instantiated with(ie. box=np.float32 by default unless instantiated with a different type)
3. The smallest type that the composite space(tuple, dict) can be represented as. In flatten, this is determined
internally by numpy when np.concatenate is called.
'''
@pytest.mark.parametrize(["original_space", "expected_flattened_dtype"], [
(Discrete(3), np.int64),
(Box(low=0., high=np.inf, shape=(2, 2)), np.float32),
(Box(low=0., high=np.inf, shape=(2, 2), dtype=np.float16), np.float16),
(Tuple([Discrete(5), Discrete(10)]), np.int64),
(Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), np.float64),
(Tuple((Discrete(5), Discrete(2), Discrete(2))), np.int64),
(MultiDiscrete([2, 2, 100]), np.int64),
(MultiBinary(10), np.int8),
(Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float16)}), np.float64),
])
3. The smallest type that the composite space(tuple, dict) can be represented as. In flatten, this is determined
internally by numpy when np.concatenate is called.
"""
@pytest.mark.parametrize(
["original_space", "expected_flattened_dtype"],
[
(Discrete(3), np.int64),
(Box(low=0.0, high=np.inf, shape=(2, 2)), np.float32),
(Box(low=0.0, high=np.inf, shape=(2, 2), dtype=np.float16), np.float16),
(Tuple([Discrete(5), Discrete(10)]), np.int64),
(
Tuple(
[
Discrete(5),
Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
]
),
np.float64,
),
(Tuple((Discrete(5), Discrete(2), Discrete(2))), np.int64),
(MultiDiscrete([2, 2, 100]), np.int64),
(MultiBinary(10), np.int8),
(
Dict(
{
"position": Discrete(5),
"velocity": Box(
low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float16
),
}
),
np.float64,
),
],
)
def test_dtypes(original_space, expected_flattened_dtype):
flattened_space = utils.flatten_space(original_space)
@@ -145,28 +254,41 @@ def test_dtypes(original_space, expected_flattened_dtype):
flattened_sample = utils.flatten(original_space, original_sample)
unflattened_sample = utils.unflatten(original_space, flattened_sample)
assert flattened_space.contains(flattened_sample), "Expected flattened_space to contain flattened_sample"
assert flattened_space.dtype == expected_flattened_dtype, "Expected flattened_space's dtype to equal " \
"{}".format(expected_flattened_dtype)
assert flattened_space.contains(
flattened_sample
), "Expected flattened_space to contain flattened_sample"
assert (
flattened_space.dtype == expected_flattened_dtype
), "Expected flattened_space's dtype to equal " "{}".format(
expected_flattened_dtype
)
assert flattened_sample.dtype == flattened_space.dtype, "Expected flattened_space's dtype to equal " \
"flattened_sample's dtype "
assert flattened_sample.dtype == flattened_space.dtype, (
"Expected flattened_space's dtype to equal " "flattened_sample's dtype "
)
compare_sample_types(original_space, original_sample, unflattened_sample)
def compare_sample_types(original_space, original_sample, unflattened_sample):
if isinstance(original_space, Discrete):
assert isinstance(unflattened_sample, int), "Expected unflattened_sample to be an int. unflattened_sample: " \
"{} original_sample: {}".format(unflattened_sample, original_sample)
assert isinstance(unflattened_sample, int), (
"Expected unflattened_sample to be an int. unflattened_sample: "
"{} original_sample: {}".format(unflattened_sample, original_sample)
)
elif isinstance(original_space, Tuple):
for index in range(len(original_space)):
compare_sample_types(original_space.spaces[index], original_sample[index], unflattened_sample[index])
compare_sample_types(
original_space.spaces[index],
original_sample[index],
unflattened_sample[index],
)
elif isinstance(original_space, Dict):
for key, space in original_space.spaces.items():
compare_sample_types(space, original_sample[key], unflattened_sample[key])
else:
assert unflattened_sample.dtype == original_sample.dtype, "Expected unflattened_sample's dtype to equal " \
"original_sample's dtype. unflattened_sample: " \
"{} original_sample: {}".format(unflattened_sample,
original_sample)
assert unflattened_sample.dtype == original_sample.dtype, (
"Expected unflattened_sample's dtype to equal "
"original_sample's dtype. unflattened_sample: "
"{} original_sample: {}".format(unflattened_sample, original_sample)
)

View File

@@ -9,10 +9,13 @@ class Tuple(Space):
Example usage:
self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
"""
def __init__(self, spaces):
self.spaces = spaces
for space in spaces:
assert isinstance(space, Space), "Elements of the tuple must be instances of gym.Space"
assert isinstance(
space, Space
), "Elements of the tuple must be instances of gym.Space"
super(Tuple, self).__init__(None, None)
def seed(self, seed=None):
@@ -24,25 +27,38 @@ class Tuple(Space):
def contains(self, x):
if isinstance(x, list):
x = tuple(x) # Promote list to tuple for contains check
return isinstance(x, tuple) and len(x) == len(self.spaces) and all(
space.contains(part) for (space,part) in zip(self.spaces,x))
return (
isinstance(x, tuple)
and len(x) == len(self.spaces)
and all(space.contains(part) for (space, part) in zip(self.spaces, x))
)
def __repr__(self):
return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")"
return "Tuple(" + ", ".join([str(s) for s in self.spaces]) + ")"
def to_jsonable(self, sample_n):
# serialize as list-repr of tuple of vectors
return [space.to_jsonable([sample[i] for sample in sample_n]) \
for i, space in enumerate(self.spaces)]
return [
space.to_jsonable([sample[i] for sample in sample_n])
for i, space in enumerate(self.spaces)
]
def from_jsonable(self, sample_n):
return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])]
return [
sample
for sample in zip(
*[
space.from_jsonable(sample_n[i])
for i, space in enumerate(self.spaces)
]
)
]
def __getitem__(self, index):
return self.spaces[index]
def __len__(self):
return len(self.spaces)
def __eq__(self, other):
return isinstance(other, Tuple) and self.spaces == other.spaces

View File

@@ -50,10 +50,10 @@ def flatten(space, x):
return onehot
elif isinstance(space, Tuple):
return np.concatenate(
[flatten(s, x_part) for x_part, s in zip(x, space.spaces)])
[flatten(s, x_part) for x_part, s in zip(x, space.spaces)]
)
elif isinstance(space, Dict):
return np.concatenate(
[flatten(s, x[key]) for key, s in space.spaces.items()])
return np.concatenate([flatten(s, x[key]) for key, s in space.spaces.items()])
elif isinstance(space, MultiBinary):
return np.asarray(x, dtype=space.dtype).flatten()
elif isinstance(space, MultiDiscrete):
@@ -89,8 +89,7 @@ def unflatten(space, x):
list_flattened = np.split(x, np.cumsum(dims)[:-1])
list_unflattened = [
(key, unflatten(s, flattened))
for flattened, (key,
s) in zip(list_flattened, space.spaces.items())
for flattened, (key, s) in zip(list_flattened, space.spaces.items())
]
return OrderedDict(list_unflattened)
elif isinstance(space, MultiBinary):
@@ -142,31 +141,23 @@ def flatten_space(space):
if isinstance(space, Box):
return Box(space.low.flatten(), space.high.flatten(), dtype=space.dtype)
if isinstance(space, Discrete):
return Box(low=0, high=1, shape=(space.n, ), dtype=space.dtype)
return Box(low=0, high=1, shape=(space.n,), dtype=space.dtype)
if isinstance(space, Tuple):
space = [flatten_space(s) for s in space.spaces]
return Box(
low=np.concatenate([s.low for s in space]),
high=np.concatenate([s.high for s in space]),
dtype=np.result_type(*[s.dtype for s in space])
dtype=np.result_type(*[s.dtype for s in space]),
)
if isinstance(space, Dict):
space = [flatten_space(s) for s in space.spaces.values()]
return Box(
low=np.concatenate([s.low for s in space]),
high=np.concatenate([s.high for s in space]),
dtype=np.result_type(*[s.dtype for s in space])
dtype=np.result_type(*[s.dtype for s in space]),
)
if isinstance(space, MultiBinary):
return Box(low=0,
high=1,
shape=(space.n, ),
dtype=space.dtype
)
return Box(low=0, high=1, shape=(space.n,), dtype=space.dtype)
if isinstance(space, MultiDiscrete):
return Box(
low=np.zeros_like(space.nvec),
high=space.nvec,
dtype=space.dtype
)
return Box(low=np.zeros_like(space.nvec), high=space.nvec, dtype=space.dtype)
raise NotImplementedError

View File

@@ -1,5 +1,6 @@
from gym import core
class ArgumentEnv(core.Env):
calls = 0
@@ -7,9 +8,10 @@ class ArgumentEnv(core.Env):
self.calls += 1
self.arg = arg
def test_env_instantiation():
# This looks like a pretty trivial, but given our usage of
# __new__, it's worth having.
env = ArgumentEnv('arg')
assert env.arg == 'arg'
env = ArgumentEnv("arg")
assert env.arg == "arg"
assert env.calls == 1

View File

@@ -12,10 +12,12 @@ from contextlib import contextmanager
# replace method which could result in the file temporarily
# disappearing.
import sys
if sys.version_info >= (3, 3):
# Python 3.3 and up have a native `replace` method
from os import replace
elif sys.platform.startswith("win"):
def replace(src, dst):
# TODO: on Windows, this will raise if the file is in use,
# which is possible. We'll need to make this more robust over
@@ -25,24 +27,27 @@ elif sys.platform.startswith("win"):
except OSError:
pass
os.rename(src, dst)
else:
# POSIX rename() is always atomic
from os import rename as replace
@contextmanager
def atomic_write(filepath, binary=False, fsync=False):
""" Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
"""Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
:param filepath: the file path to be opened
:param binary: whether to open the file in a binary mode instead of textual
:param fsync: whether to force write the file to disk
"""
tmppath = filepath + '~'
tmppath = filepath + "~"
while os.path.isfile(tmppath):
tmppath += '~'
tmppath += "~"
try:
with open(tmppath, 'wb' if binary else 'w') as file:
with open(tmppath, "wb" if binary else "w") as file:
yield file
if fsync:
file.flush()

View File

@@ -2,6 +2,7 @@ import atexit
import threading
import weakref
class Closer(object):
"""A registry that ensures your objects get closed, whether manually,
upon garbage collection, or upon exit. To work properly, your
@@ -48,7 +49,7 @@ class Closer(object):
Returns:
int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
"""
assert hasattr(closeable, 'close'), 'No close method for {}'.format(closeable)
assert hasattr(closeable, "close"), "No close method for {}".format(closeable)
next_id = self.generate_next_id()
self.closeables[next_id] = closeable

View File

@@ -11,11 +11,11 @@ color2num = dict(
magenta=35,
cyan=36,
white=37,
crimson=38
crimson=38,
)
def colorize(string, color, bold=False, highlight = False):
def colorize(string, color, bold=False, highlight=False):
"""Return string surrounded by appropriate terminal color codes to
print colorized text. Valid colors: gray, red, green, yellow,
blue, magenta, cyan, white, crimson
@@ -23,8 +23,10 @@ def colorize(string, color, bold=False, highlight = False):
attr = []
num = color2num[color]
if highlight: num += 10
if highlight:
num += 10
attr.append(str(num))
if bold: attr.append('1')
attrs = ';'.join(attr)
return '\x1b[%sm%s\x1b[0m' % (attrs, string)
if bold:
attr.append("1")
attrs = ";".join(attr)
return "\x1b[%sm%s\x1b[0m" % (attrs, string)

Some files were not shown because too many files have changed in this diff Show More