[WIP] add support for seeding environments (#135)

* Make environments seedable

* Fix monitor bugs

- Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors.
- Remove extra pid from stats recorder filename. This should be purely cosmetic.

* Start uploading seeds in episode_batch

* Fix _bigint_from_bytes for python3

* Set seed explicitly in random_agent

* Pass through seed argument

* Also pass through random state to spaces

* Pass random state into the observation/action spaces

* Make all _seed methods return the list of used seeds

* Switch over to np.random where possible

* Start hashing seeds, and also seed doom engine

* Fixup seeding determinism in many cases

* Seed before loading the ROM

* Make seeding more Python3 friendly

* Make the MuJoCo skipping a bit more forgiving

* Remove debugging PDB calls

* Make setInt argument into raw bytes

* Validate and upload seeds

* Skip box2d

* Make seeds smaller, and change representation of seeds in upload

* Handle long seeds

* Fix RandomAgent example to be deterministic

* Handle integer types correctly in Python2 and Python3

* Try caching pip

* Try adding swap

* Add df and free calls

* Bump swap

* Bump swap size

* Try setting overcommit

* Try other sysctls

* Try fixing overcommit

* Try just setting overcommit_memory=1

* Add explanatory comment

* Add what's new section to readme

* BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now

* Document seed

* Move nondetermistic check into spec
This commit is contained in:
Greg Brockman
2016-05-29 09:07:09 -07:00
parent 2e26518b4f
commit 58e6aa95e5
61 changed files with 711 additions and 285 deletions

3
.gitignore vendored
View File

@@ -23,8 +23,6 @@ ghostdriver.log
junk
MUJOCO_LOG.txt
mujoco-bundle
rllab_mujoco
@@ -36,3 +34,4 @@ tutorial/*.html
# PyCharm project files
.idea
vizdoom.ini

View File

@@ -2,7 +2,7 @@ dist: trusty
sudo: required
cache:
apt: true
pip: false
pip: true
language: python
addons:
apt:
@@ -30,6 +30,11 @@ before_install:
# In a pull request, there are no secrets, and hence no MuJoCo:
# https://docs.travis-ci.com/user/pull-requests#Security-Restrictions-when-testing-Pull-Requests.
- '[ -z ${MUJOCO_KEY_BUNDLE+x} ] || ( curl https://openai-public.s3-us-west-2.amazonaws.com/mujoco/$MUJOCO_KEY_BUNDLE.tar.gz | tar xz -C ~/.mujoco )'
# Without this line, Travis has fork()s fail with an out of memory
# error. (These fork()s are for spawning the subprocess for video
# recording.) We should debug the memory usage at some stage, but
# simply setting overcommit is a good starting point.
- sudo sysctl -w vm.overcommit_memory=1
env:
- DISPLAY=:12
install: pip install tox-travis

View File

@@ -251,3 +251,10 @@ We are using `nose2 <https://github.com/nose-devs/nose2>`_ for tests. You can ru
nose2
You can also run tests in a specific directory by using the ``-s`` option, or by passing in the specific name of the test. See the `nose2 docs <http://nose2.readthedocs.org/en/latest/usage.html#naming-tests>`_ for more details.
What's new
----------
- 2016-05-28: For controlled reproducibility, envs now support seeding
(cf #91 and #135). The monitor records which seeds are used. We will
soon add seed information to the display on the scoreboard.

View File

@@ -19,14 +19,18 @@ if __name__ == '__main__':
logger.setLevel(logging.INFO)
env = gym.make('CartPole-v0' if len(sys.argv)<2 else sys.argv[1])
agent = RandomAgent(env.action_space)
# You provide the directory to write to (can be an existing
# directory, including one with existing data -- all monitor files
# will be namespaced). You can also dump to a tempdir if you'd
# like: tempfile.mkdtemp().
outdir = '/tmp/random-agent-results'
env.monitor.start(outdir, force=True)
env.monitor.start(outdir, force=True, seed=0)
# This declaration must go *after* the monitor call, since the
# monitor's seeding creates a new action_space instance with the
# appropriate pseudorandom number generator.
agent = RandomAgent(env.action_space)
episode_count = 100
max_steps = 200

View File

@@ -1,9 +1,4 @@
import hashlib
import numpy as np
import logging
import os
import random
import struct
import sys
import gym
@@ -40,48 +35,3 @@ def undo_logger_setup():
root_logger.removeHandler(handler)
gym.logger.setLevel(logging.NOTSET)
requests_logger.setLevel(logging.NOTSET)
def seed(a=None):
"""Seeds the 'random' and 'numpy.random' generators. By default,
Python seeds these with the system time. Call this if you are
using multiple processes.
Notes:
SECURITY SENSITIVE: a bug here would allow people to generate fake results. Please let us know if you find one :).
Args:
a (Optional[int, str]): None or no argument seeds from an operating system specific randomness source. If an int or str passed, then all of bits are used.
"""
# Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
if a is None:
a = bigint_from_bytes(os.urandom(32))
if isinstance(a, str):
a = a.encode('utf8')
a += hashlib.sha512(a).digest()
a = bigint_from_bytes(a)
# Actually seed the generators
random.seed(a)
np.random.seed(int_list_from_bigint(a))
return a
# TODO: don't hardcode sizeof_int here
def bigint_from_bytes(bytes):
sizeof_int = 4
padding = sizeof_int - len(bytes) % sizeof_int
bytes += '\0' * padding
int_count = len(bytes) / sizeof_int
unpacked = struct.unpack("{}I".format(int_count), bytes)
accum = 0
for i, val in enumerate(unpacked):
accum += 2 ** (sizeof_int * 8 * i) * val
return accum
def int_list_from_bigint(bigint):
ints = []
while bigint > 0:
bigint, mod = divmod(bigint, 2 ** 32)
ints.append(mod)
return ints

View File

@@ -17,10 +17,11 @@ class Env(object):
The main API methods that users of this class need to know are:
reset
step
reset
render
close
seed
When implementing an environment, override the following methods
in your subclass:
@@ -28,6 +29,8 @@ class Env(object):
_step
_reset
_render
_close
_seed
And set the following attributes:
@@ -70,6 +73,7 @@ class Env(object):
if close:
return
raise NotImplementedError
def _seed(self, seed=None): return []
@property
def monitor(self):
@@ -172,7 +176,9 @@ class Env(object):
Environments will automatically close() themselves when
garbage collected or when the program exits.
"""
if self._closed:
# _closed will be missing if this instance is still
# initializing.
if not hasattr(self, '_closed') or self._closed:
return
self._close()
@@ -181,6 +187,23 @@ class Env(object):
# end up with double close.
self._closed = True
def seed(self, seed=None):
"""Sets the seed for this env's random number generator(s).
Note:
Some environments use multiple pseudorandom number generators.
We want to capture all such seeds used in order to ensure that
there aren't accidental correlations between multiple generators.
Returns:
list<bigint>: Returns the list of seeds used in this env's random
number generators. The first value in the list should be the
"main" seed, or the value which a reproducer should pass to
'seed'. Often, the main seed equals the provided 'seed', but
this won't be true if seed=None, for example.
"""
return self._seed(seed)
def __del__(self):
self.close()

View File

@@ -228,11 +228,21 @@ for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', '
name = ''.join([g.capitalize() for g in game.split('_')])
if obs_type == 'ram':
name = '{}-ram'.format(name)
nondeterministic = False
if game == 'elevator_action' and obs_type == 'ram':
# ElevatorAction-ram-v0 seems to yield slightly
# non-deterministic observations about 10% of the time. We
# should track this down eventually, but for now we just
# mark it as nondetermistic.
nondeterministic = True
register(
id='{}-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type},
timestep_limit=10000,
nondeterministic=nondeterministic,
)
# Board games
@@ -248,6 +258,11 @@ register(
'illegal_move_mode': 'lose',
'board_size': 9,
},
# The pachi player seems not to be determistic given a fixed seed.
# (Reproduce by running 'import gym; h = gym.make('Go9x9-v0'); h.seed(1); h.reset(); h.step(15); h.step(16); h.step(17)' a few times.)
#
# This is probably due to a computation time limit.
nondetermistic=True,
)
register(
@@ -260,6 +275,7 @@ register(
'illegal_move_mode': 'lose',
'board_size': 19,
},
nondetermistic=True,
)
register(

View File

@@ -1,8 +1,7 @@
from gym import Env
from gym.spaces import Discrete, Tuple
from gym.utils import colorize
from gym.utils import colorize, seeding
import numpy as np
import random
from six import StringIO
import sys
import math
@@ -17,6 +16,7 @@ class AlgorithmicEnv(Env):
def __init__(self, inp_dim=1, base=10, chars=False):
global hash_base
hash_base = 50 ** np.arange(inp_dim)
self.base = base
self.last = 10
@@ -27,10 +27,17 @@ class AlgorithmicEnv(Env):
self.inp_dim = inp_dim
AlgorithmicEnv.current_length = 2
tape_control = []
self.action_space = Tuple(([Discrete(2 * inp_dim), Discrete(2), Discrete(self.base)]))
self.observation_space = Discrete(self.base + 1)
self._seed()
self.reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = Tuple(([Discrete(2 * self.inp_dim, np_random=self.np_random), Discrete(2, np_random=self.np_random), Discrete(self.base, np_random=self.np_random)]))
self.observation_space = Discrete(self.base + 1, np_random=self.np_random)
return [seed]
def _get_obs(self, pos=None):
if pos is None:
pos = self.x
@@ -198,6 +205,6 @@ class AlgorithmicEnv(Env):
AlgorithmicEnv.sum_rewards = []
self.sum_reward = 0.0
self.time = 0
self.total_len = random.randrange(3) + AlgorithmicEnv.current_length
self.total_len = self.np_random.randint(3) + AlgorithmicEnv.current_length
self.set_data()
return self._get_obs()

View File

@@ -2,7 +2,6 @@
Task is to copy content from the input tape to
the output tape. http://arxiv.org/abs/1511.07275
"""
import random
import numpy as np
from gym.envs.algorithmic import algorithmic_env
from gym.envs.algorithmic.algorithmic_env import ha
@@ -17,8 +16,7 @@ class CopyEnv(algorithmic_env.AlgorithmicEnv):
self.content = {}
self.target = {}
for i in range(self.total_len):
val = random.randrange(self.base)
val = self.np_random.randint(self.base)
self.content[ha(np.array([i]))] = val
self.target[i] = val
self.total_reward = self.total_len

View File

@@ -3,7 +3,6 @@ Task is to return every second character from the input tape.
http://arxiv.org/abs/1511.07275
"""
import random
import numpy as np
from gym.envs.algorithmic import algorithmic_env
from gym.envs.algorithmic.algorithmic_env import ha
@@ -20,7 +19,7 @@ class DuplicatedInputEnv(algorithmic_env.AlgorithmicEnv):
self.target = {}
copies = int(self.total_len / self.duplication)
for i in range(copies):
val = random.randrange(self.base)
val = self.np_random.randint(self.base)
self.target[i] = val
for d in range(self.duplication):
self.content[ha(np.array([i * self.duplication + d]))] = val

View File

@@ -2,7 +2,6 @@
Task is to copy content multiple-times from the input tape to
the output tape. http://arxiv.org/abs/1511.07275
"""
import random
import numpy as np
from gym.envs.algorithmic import algorithmic_env
from gym.envs.algorithmic.algorithmic_env import ha
@@ -20,10 +19,9 @@ class RepeatCopyEnv(algorithmic_env.AlgorithmicEnv):
self.target = {}
unique = set()
for i in range(self.total_len):
val = random.randrange(self.base)
val = self.np_random.randint(self.base)
self.content[ha(np.array([i]))] = val
self.target[i] = val
self.target[2 * self.total_len - i - 1] = val
self.target[2 * self.total_len + i] = val
self.total_reward = 3.0 * self.total_len + 0.9

View File

@@ -3,7 +3,6 @@ Task is to reverse content over the input tape.
http://arxiv.org/abs/1511.07275
"""
import random
import numpy as np
from gym.envs.algorithmic import algorithmic_env
from gym.envs.algorithmic.algorithmic_env import ha
@@ -21,7 +20,7 @@ class ReverseEnv(algorithmic_env.AlgorithmicEnv):
self.content = {}
self.target = {}
for i in range(self.total_len):
val = random.randrange(self.base)
val = self.np_random.randint(self.base)
self.content[ha(np.array([i]))] = val
self.target[self.total_len - i - 1] = val
self.total_reward = self.total_len + 0.9

View File

@@ -1,4 +1,3 @@
import random
import numpy as np
from gym.envs.algorithmic import algorithmic_env
from gym.envs.algorithmic.algorithmic_env import ha
@@ -17,7 +16,7 @@ class ReversedAdditionEnv(algorithmic_env.AlgorithmicEnv):
for i in range(self.total_len):
vals = []
for k in range(self.rows):
val = random.randrange(self.base)
val = self.np_random.randint(self.base)
self.content[ha(np.array([i, k]))] = val
vals.append(val)
total = sum(vals) + curry
@@ -26,5 +25,3 @@ class ReversedAdditionEnv(algorithmic_env.AlgorithmicEnv):
if curry > 0:
self.target[self.total_len] = curry
self.total_reward = self.total_len

View File

@@ -3,6 +3,7 @@ import os
import gym
from gym import error, spaces
from gym import utils
from gym.utils import seeding
try:
import atari_py
@@ -30,29 +31,42 @@ class AtariEnv(gym.Env, utils.EzPickle):
def __init__(self, game='pong', obs_type='ram'):
utils.EzPickle.__init__(self, game, obs_type)
assert obs_type in ('ram', 'image')
game_path = atari_py.get_game_path(game)
if not os.path.exists(game_path):
raise IOError('You asked for game %s but path %s does not exist'%(game, game_path))
self.ale = atari_py.ALEInterface()
self.ale.loadROM(game_path)
self.game_path = atari_py.get_game_path(game)
if not os.path.exists(self.game_path):
raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
self._obs_type = obs_type
self._action_set = self.ale.getMinimalActionSet()
self.ale = atari_py.ALEInterface()
self.viewer = None
(screen_width,screen_height) = self.ale.getScreenDims()
self._seed()
self.action_space = spaces.Discrete(len(self._action_set))
def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets
# checked as an int elsewhere, so we need to keep it below
# 2**31.
seed2 = seeding.hash_seed(seed1 + 1) % 2**31
# Empirically, we need to seed before loading the ROM.
self.ale.setInt(b'random_seed', seed2)
self.ale.loadROM(self.game_path)
self._action_set = self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)
(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
return [seed1, seed2]
def _step(self, a):
reward = 0.0
action = self._action_set[a]
num_steps = np.random.randint(2, 5)
num_steps = self.np_random.randint(2, 5)
for _ in range(num_steps):
reward += self.ale.act(action)
ob = self._get_obs()

View File

@@ -8,6 +8,7 @@ except ImportError as e:
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
from six import StringIO
import sys
import six
@@ -71,10 +72,12 @@ class GoState(object):
### Adversary policies ###
def make_random_policy(np_random):
def random_policy(curr_state, prev_state, prev_action):
b = curr_state.board
legal_coords = b.get_legal_coords(curr_state.color)
return _coord_to_action(b, np.random.choice(legal_coords))
return _coord_to_action(b, np_random.choice(legal_coords))
return random_policy
def make_pachi_policy(board, engine_type='uct', threads=1, pachi_timestr=''):
engine = pachi_py.PyPachiEngine(board, engine_type, six.b('threads=%d' % threads))
@@ -122,16 +125,18 @@ class GoEnv(gym.Env):
metadata = {"render.modes": ["human", "ansi"]}
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size):
'''
"""
Args:
player_color: Stone color for the agent. Either 'black' or 'white'
opponent: An opponent policy
observation_type: State encoding
illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
'''
"""
assert isinstance(board_size, int) and board_size >= 1, 'Invalid board size: {}'.format(board_size)
self.board_size = board_size
self._seed()
colormap = {
'black': pachi_py.BLACK,
'white': pachi_py.WHITE,
@@ -150,17 +155,22 @@ class GoEnv(gym.Env):
assert illegal_move_mode in ['lose', 'raise']
self.illegal_move_mode = illegal_move_mode
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2)
if self.observation_type == 'image3c':
shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
else:
if self.observation_type != 'image3c':
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
self.reset()
def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
pachi_py.pachi_srand(seed2)
shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
return [seed1, seed2]
def _reset(self):
self.state = GoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK)
@@ -250,7 +260,7 @@ class GoEnv(gym.Env):
def _reset_opponent(self, board):
if self.opponent == 'random':
self.opponent_policy = random_policy
self.opponent_policy = make_random_policy(self.np_random)
elif self.opponent == 'pachi:uct:_2400':
self.opponent_policy = make_pachi_policy(board=board, engine_type=six.b('uct'), pachi_timestr=six.b('_2400')) # TODO: strength as argument
else:

View File

@@ -8,13 +8,14 @@ import gym
from gym import spaces
import numpy as np
from gym import error
from gym.utils import seeding
def make_random_policy(np_random):
def random_policy(state):
possible_moves = HexEnv.get_possible_actions(state)
a = np.random.randint(len(possible_moves))
a = np_random.randint(len(possible_moves))
return possible_moves[a]
return random_policy
class HexEnv(gym.Env):
"""
@@ -46,13 +47,6 @@ class HexEnv(gym.Env):
raise error.Error("player_color must be 'black' or 'white', not {}".format(player_color))
self.opponent = opponent
if isinstance(self.opponent, str):
if opponent == 'random':
self.opponent_policy = random_policy
else:
raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
else:
self.opponent_policy = opponent
assert observation_type in ['numpy3c']
self.observation_type = observation_type
@@ -60,14 +54,28 @@ class HexEnv(gym.Env):
assert illegal_move_mode in ['lose', 'raise']
self.illegal_move_mode = illegal_move_mode
# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1)
if self.observation_type != 'numpy3c':
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
observation = self.reset()
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape))
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)
# Update the random policy if needed
if isinstance(self.opponent, str):
if self.opponent == 'random':
self.opponent_policy = make_random_policy(self.np_random)
else:
raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
else:
self.opponent_policy = self.opponent
return [seed]
def _reset(self):
self.state = np.zeros((3, self.board_size, self.board_size))

View File

@@ -6,6 +6,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute
import gym
from gym import spaces
from gym.utils import colorize, seeding
# This is simple 4-joints walker robot environment.
#
@@ -86,12 +87,9 @@ class BipedalWalker(gym.Env):
hardcore = False
def __init__(self):
self._seed()
self.viewer = None
high = np.array([np.inf]*24)
self.action_space = spaces.Box( np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]) )
self.observation_space = spaces.Box(-high, high)
self.world = Box2D.b2World()
self.terrain = None
self.hull = None
@@ -99,6 +97,13 @@ class BipedalWalker(gym.Env):
self.prev_shaping = None
self._reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed]
def _destroy(self):
if not self.terrain: return
self.world.contactListener = None
@@ -128,11 +133,11 @@ class BipedalWalker(gym.Env):
if state==GRASS and not oneshot:
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD: velocity += np.random.uniform(-1, 1)/SCALE #1
if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE #1
y += velocity
elif state==PIT and oneshot:
counter = np.random.randint(3, 5)
counter = self.np_random.randint(3, 5)
poly = [
(x, y),
(x+TERRAIN_STEP, y),
@@ -162,7 +167,7 @@ class BipedalWalker(gym.Env):
y -= 4*TERRAIN_STEP
elif state==STUMP and oneshot:
counter = np.random.randint(1, 3)
counter = self.np_random.randint(1, 3)
poly = [
(x, y),
(x+counter*TERRAIN_STEP, y),
@@ -178,9 +183,9 @@ class BipedalWalker(gym.Env):
self.terrain.append(t)
elif state==STAIRS and oneshot:
stair_height = +1 if np.random.ranf() > 0.5 else -1
stair_width = np.random.randint(4, 5)
stair_steps = np.random.randint(3, 5)
stair_height = +1 if self.np_random.rand() > 0.5 else -1
stair_width = self.np_random.randint(4, 5)
stair_steps = self.np_random.randint(3, 5)
original_y = y
for s in range(stair_steps):
poly = [
@@ -207,9 +212,9 @@ class BipedalWalker(gym.Env):
self.terrain_y.append(y)
counter -= 1
if counter==0:
counter = np.random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
if state==GRASS and hardcore:
state = np.random.randint(1, _STATES_)
state = self.np_random.randint(1, _STATES_)
oneshot = True
else:
state = GRASS
@@ -240,11 +245,11 @@ class BipedalWalker(gym.Env):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in range(TERRAIN_LENGTH//20):
x = np.random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
y = VIEWPORT_H/SCALE*3/4
poly = [
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP),
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP) )
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
for a in range(5) ]
x1 = min( [p[0] for p in poly] )
x2 = max( [p[0] for p in poly] )
@@ -278,7 +283,7 @@ class BipedalWalker(gym.Env):
)
self.hull.color1 = (0.5,0.4,0.9)
self.hull.color2 = (0.3,0.3,0.5)
self.hull.ApplyForceToCenter((np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
self.legs = []
self.joints = []

View File

@@ -7,6 +7,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute
import gym
from gym import spaces
from gym.envs.classic_control import rendering
from gym.utils import colorize, seeding
import pyglet
from pyglet.gl import *
@@ -106,8 +107,7 @@ class CarRacing(gym.Env):
}
def __init__(self):
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]) ) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
self._seed()
self.world = Box2D.b2World((0,0), contactListener=FrictionDetector(self))
self.viewer = None
self.invisible_state_window = None
@@ -117,6 +117,12 @@ class CarRacing(gym.Env):
self.reward = 0.0
self.prev_reward = 0.0
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
return [seed]
def _destroy(self):
if not self.road: return
for t in self.road:
@@ -130,8 +136,8 @@ class CarRacing(gym.Env):
# Create checkpoints
checkpoints = []
for c in range(CHECKPOINTS):
alpha = 2*math.pi*c/CHECKPOINTS + np.random.uniform(0, 2*math.pi*1/CHECKPOINTS)
rad = np.random.uniform(TRACK_RAD/3, TRACK_RAD)
alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
if c==0:
alpha = 0
rad = 1.5*TRACK_RAD

View File

@@ -6,6 +6,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute
import gym
from gym import spaces
from gym.utils import seeding
# Rocket trajectory optimization is a classic topic in Optimal Control.
#
@@ -76,12 +77,9 @@ class LunarLander(gym.Env):
}
def __init__(self):
self._seed()
self.viewer = None
high = np.array([np.inf]*8) # useful range is -1 .. +1
self.action_space = spaces.Discrete(4) # nop, fire left engine, main engine, right engine
self.observation_space = spaces.Box(-high, high)
self.world = Box2D.b2World()
self.moon = None
self.lander = None
@@ -90,6 +88,16 @@ class LunarLander(gym.Env):
self.prev_reward = None
self._reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# useful range is -1 .. +1
high = np.array([np.inf]*8)
# nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4, np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed]
def _destroy(self):
if not self.moon: return
self.world.contactListener = None
@@ -112,7 +120,7 @@ class LunarLander(gym.Env):
# terrain
CHUNKS = 11
height = np.random.uniform(0, H/2, size=(CHUNKS+1,) )
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS//2-1]
self.helipad_x2 = chunk_x[CHUNKS//2+1]
@@ -153,8 +161,8 @@ class LunarLander(gym.Env):
self.lander.color1 = (0.5,0.4,0.9)
self.lander.color2 = (0.3,0.3,0.5)
self.lander.ApplyForceToCenter( (
np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
), True)
self.legs = []
@@ -222,7 +230,7 @@ class LunarLander(gym.Env):
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0]);
dispersion = [np.random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
if action==2: # Main engine
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
@@ -368,4 +376,3 @@ if __name__=="__main__":
env.render()
if done: break

View File

@@ -1,5 +1,6 @@
"""classic Acrobot task"""
from gym import core, spaces
from gym.utils import seeding
import numpy as np
import time
@@ -78,14 +79,20 @@ class AcrobotEnv(core.Env):
actions_num = 3
def __init__(self):
self.viewer = None
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low, high)
self.action_space = spaces.Discrete(3)
self.viewer = None
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
return [seed]
def _reset(self):
self.state = np.random.uniform(low=-0.1, high=0.1, size=(4,))
self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
return self.state
def _step(self, a):
@@ -94,7 +101,7 @@ class AcrobotEnv(core.Env):
# Add noise to the force action
if self.torque_noise_max > 0:
torque += np.random.uniform(-self.torque_noise_max, self.torque_noise_max)
torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
# Now, augment the state with our force action so it can be passed to
# _dsdt

View File

@@ -7,6 +7,7 @@ import logging
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
logger = logging.getLogger(__name__)
@@ -30,15 +31,20 @@ class CartPoleEnv(gym.Env):
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
self._seed()
self.reset()
self.viewer = None
self.steps_beyond_done = None
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high)
self.steps_beyond_done = None
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed]
def _step(self, action):
action = action
@@ -77,7 +83,7 @@ class CartPoleEnv(gym.Env):
return np.array(self.state), reward, done, {}
def _reset(self):
self.state = np.random.uniform(low=-0.05, high=0.05, size=(4,))
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
self.steps_beyond_done = None
return np.array(self.state)

View File

@@ -5,6 +5,7 @@ https://webdocs.cs.ualberta.ca/~sutton/MountainCar/MountainCar1.cp
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
class MountainCarEnv(gym.Env):
@@ -14,10 +15,6 @@ class MountainCarEnv(gym.Env):
}
def __init__(self):
self.reset()
self.viewer = None
self.reset()
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
@@ -26,8 +23,16 @@ class MountainCarEnv(gym.Env):
self.low = np.array([self.min_position, -self.max_speed])
self.high = np.array([self.max_position, self.max_speed])
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high)
self.viewer = None
self._seed()
self.reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
return [seed]
def _step(self, action):
# action = np.sign((self.state[0]+math.pi/2) * self.state[1])+1
@@ -48,7 +53,7 @@ class MountainCarEnv(gym.Env):
return np.array(self.state), reward, done, {}
def _reset(self):
self.state = np.array([np.random.uniform(low=-0.6, high=-0.4), 0])
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)
def _height(self, xs):

View File

@@ -1,5 +1,6 @@
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
@@ -14,10 +15,15 @@ class PendulumEnv(gym.Env):
self.max_torque=2.
self.dt=.05
self.viewer = None
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,))
self.observation_space = spaces.Box(low=-high, high=high)
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
return [seed]
def _step(self,u):
th, thdot = self.state # th := theta
@@ -40,7 +46,7 @@ class PendulumEnv(gym.Env):
def _reset(self):
high = np.array([np.pi, 1])
self.state = np.random.uniform(low=-high, high=high)
self.state = self.np_random.uniform(low=-high, high=high)
self.last_u = None
return self._get_obs()

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -49,10 +50,19 @@ class DoomBasicEnv(doom_env.DoomEnv):
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -50,10 +51,20 @@ class DoomCorridorEnv(doom_env.DoomEnv):
self.game.set_doom_scenario_path(self.loader.get_scenario_path('deadly_corridor.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -5,6 +5,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.utils import seeding
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
@@ -40,10 +41,20 @@ class DoomDeathmatchEnv(doom_env.DoomEnv):
self.game.set_doom_scenario_path(self.loader.get_scenario_path('deathmatch.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -49,10 +50,20 @@ class DoomDefendCenterEnv(doom_env.DoomEnv):
self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_center.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -49,10 +50,19 @@ class DoomDefendLineEnv(doom_env.DoomEnv):
self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_line.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self._seed()
self.game.init()
self.game.new_episode()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -47,10 +48,20 @@ class DoomHealthGatheringEnv(doom_env.DoomEnv):
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -46,10 +47,20 @@ class DoomMyWayHomeEnv(doom_env.DoomEnv):
self.game.set_doom_scenario_path(self.loader.get_scenario_path('my_way_home.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -51,10 +52,20 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -6,6 +6,7 @@ import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
from gym.utils import seeding
logger = logging.getLogger(__name__)
@@ -44,10 +45,21 @@ class DoomTakeCoverEnv(doom_env.DoomEnv):
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 2 allowed actions [9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 2 allowed actions [9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -36,8 +36,8 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
])
def reset_model(self):
qpos = self.init_qpos + np.random.uniform(size=self.model.nq,low=-.1,high=.1)
qvel = self.init_qvel + np.random.randn(self.model.nv) * .1
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq,low=-.1,high=.1)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -25,8 +25,8 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
])
def reset_model(self):
qpos = self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq)
qvel = self.init_qvel + np.random.randn(self.model.nv) * .1
qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -28,8 +28,8 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
])
def reset_model(self):
qpos = self.init_qpos + np.random.uniform(low=-.005, high=.005, size=self.model.nq)
qvel = self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -39,8 +39,8 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + np.random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + np.random.uniform(low=-c, high=c, size=self.model.nv,)
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
)
return self._get_obs()

View File

@@ -40,8 +40,8 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + np.random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + np.random.uniform(low=-c, high=c, size=self.model.nv,)
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
)
return self._get_obs()

View File

@@ -31,8 +31,8 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def reset_model(self):
self.set_state(
self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + np.random.randn(self.model.nv) * .1
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.randn(self.model.nv) * .1
)
return self._get_obs()

View File

@@ -16,8 +16,8 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
return ob, reward, done, {}
def reset_model(self):
qpos = self.init_qpos + np.random.uniform(size=self.model.nq, low=-0.01, high=0.01)
qvel = self.init_qvel + np.random.uniform(size=self.model.nv, low=-0.01, high=0.01)
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -1,6 +1,7 @@
import os
from gym import error, spaces
from gym.utils import seeding
import numpy as np
from os import path
import gym
@@ -13,9 +14,7 @@ except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
class MujocoEnv(gym.Env):
"""
Superclass of MuJoCo environments.
"""Superclass for all MuJoCo environments.
"""
def __init__(self, model_path, frame_skip):
@@ -40,15 +39,20 @@ class MujocoEnv(gym.Env):
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
assert not done
self.obs_dim = observation.size
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
bounds = self.model.actuator_ctrlrange.copy()
low = bounds[:, 0]
high = bounds[:, 1]
self.action_space = spaces.Box(low, high)
self.action_space = spaces.Box(low, high, np_random=self.np_random)
high = np.inf*np.ones(self.obs_dim)
low = -high
self.observation_space = spaces.Box(low, high)
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
return [seed]
# methods to override:
# ----------------------------

View File

@@ -21,12 +21,12 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.viewer.cam.trackbodyid=0
def reset_model(self):
qpos = np.random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
while True:
self.goal = np.random.uniform(low=-.2, high=.2, size=2)
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
if np.linalg.norm(self.goal) < 2: break
qpos[-2:] = self.goal
qvel = self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -26,7 +26,7 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def reset_model(self):
self.set_state(
self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + np.random.uniform(low=-.1, high=.1, size=self.model.nv)
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
)
return self._get_obs()

View File

@@ -28,8 +28,8 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def reset_model(self):
self.set_state(
self.init_qpos + np.random.uniform(low=-.005, high=.005, size=self.model.nq),
self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
)
return self._get_obs()

View File

@@ -24,6 +24,7 @@ class EnvSpec(object):
trials (int): The number of trials to average reward over
reward_threshold (Optional[int]): The reward threshold before the task is considered solved
kwargs (dict): The kwargs to pass to the environment class
nondeterministic (bool): Whether this environment is non-deterministic even after seeding
Attributes:
id (str): The official environment ID
@@ -31,7 +32,7 @@ class EnvSpec(object):
trials (int): The number of trials run in official evaluation
"""
def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, kwargs=None):
def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, kwargs=None, nondeterministic=False):
self.id = id
# Evaluation parameters
self.timestep_limit = timestep_limit
@@ -46,6 +47,7 @@ class EnvSpec(object):
self._env_name = match.group(1)
self._entry_point = entry_point
self._kwargs = {} if kwargs is None else kwargs
self._nondeterministic = nondeterministic
def make(self):
"""Instantiates an instance of the environment with appropriate kwargs"""

View File

@@ -0,0 +1,77 @@
import numpy as np
from nose2 import tools
import os
import logging
logger = logging.getLogger(__name__)
import gym
from gym import envs
specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
@tools.params(*specs)
def test_env(spec):
# Skip mujoco tests for pull request CI
skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
return
# TODO(jonas 2016-05-11): Re-enable these tests after fixing box2d-py
if spec._entry_point.startswith('gym.envs.box2d:'):
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
return
env1 = spec.make()
env1.seed(0)
action_samples1 = [env1.action_space.sample() for i in range(4)]
observation_samples1 = [env1.observation_space.sample() for i in range(4)]
initial_observation1 = env1.reset()
step_responses1 = [env1.step(action) for action in action_samples1]
env1.close()
env2 = spec.make()
env2.seed(0)
action_samples2 = [env2.action_space.sample() for i in range(4)]
observation_samples2 = [env2.observation_space.sample() for i in range(4)]
initial_observation2 = env2.reset()
step_responses2 = [env2.step(action) for action in action_samples2]
env2.close()
for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
assert np.array_equal(action_sample1, action_sample2), '[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2)
for i, (observation_sample1, observation_sample2) in enumerate(zip(observation_samples1, observation_samples2)):
# Allows for NaNs
np.testing.assert_array_equal(observation_sample1, observation_sample2)
# Don't check rollout equality if it's a a nondetermistic
# environment.
if spec.nondetermistic:
return
assert np.array_equal(initial_observation1, initial_observation2), 'initial_observation1: {}, initial_observation2: {}'.format(initial_observation1, initial_observation2)
for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
assert_equals(o1, o2, '[{}] '.format(i))
assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)
# Go returns a Pachi game board in info, which doesn't
# properly check equality. For now, we hack around this by
# just skipping Go.
if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
assert_equals(i1, i2, '[{}] '.format(i))
def assert_equals(a, b, prefix=None):
assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b)
if isinstance(a, dict):
assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b)
for k in a.keys():
v_a = a[k]
v_b = b[k]
assert_equals(v_a, v_b)
elif isinstance(a, np.ndarray):
np.testing.assert_array_equal(a, b)
else:
assert a == b

View File

@@ -15,7 +15,7 @@ specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
@tools.params(*specs)
def test_env(spec):
# Skip mujoco tests for pull request CI
skip_mujoco = not os.environ.get('MUJOCO_KEY_BUNDLE')
skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
return

View File

@@ -1,6 +1,6 @@
import gym
import random
from gym import spaces
from gym.utils import seeding
def cmp(a, b):
return (a > b) - (a < b)
@@ -9,12 +9,12 @@ def cmp(a, b):
deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
def draw_card():
return random.choice(deck)
def draw_card(np_random):
return np_random.choice(deck)
def draw_hand():
return [draw_card(), draw_card()]
def draw_hand(np_random):
return [draw_card(np_random), draw_card(np_random)]
def usable_ace(hand): # Does this hand have a usable ace?
@@ -71,20 +71,27 @@ class BlackjackEnv(gym.Env):
https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
"""
def __init__(self, natural=False):
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Tuple((spaces.Discrete(32),
spaces.Discrete(11),
spaces.Discrete(2)))
self._seed()
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
self.natural = natural
# Start the first game
self._reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Tuple((
spaces.Discrete(32, np_random=self.np_random),
spaces.Discrete(11, np_random=self.np_random),
spaces.Discrete(2, np_random=self.np_random)))
return [seed]
def _step(self, action):
assert(self.action_space.contains(action))
if action: # hit: add a card to players hand and return
self.player.append(draw_card())
self.player.append(draw_card(self.np_random))
if is_bust(self.player):
done = True
reward = -1
@@ -94,7 +101,7 @@ class BlackjackEnv(gym.Env):
else: # stick: play out the dealers hand, and score
done = True
while sum_hand(self.dealer) < 17:
self.dealer.append(draw_card())
self.dealer.append(draw_card(self.np_random))
reward = cmp(score(self.player), score(self.dealer))
if self.natural and is_natural(self.player) and reward == 1:
reward = 1.5
@@ -104,6 +111,6 @@ class BlackjackEnv(gym.Env):
return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
def _reset(self):
self.dealer = draw_hand()
self.player = draw_hand()
self.dealer = draw_hand(self.np_random)
self.player = draw_hand(self.np_random)
return self._get_obs()

View File

@@ -1,15 +1,15 @@
from gym import Env
from gym import spaces
from gym import Env, spaces
from gym.utils import seeding
import numpy as np
def categorical_sample(prob_n):
def categorical_sample(prob_n, np_random):
"""
Sample from categorical distribution
Each row specifies class probabilities
"""
prob_n = np.asarray(prob_n)
csprob_n = np.cumsum(prob_n)
return (csprob_n > np.random.rand()).argmax()
return (csprob_n > np_random.rand()).argmax()
class DiscreteEnv(Env):
@@ -28,24 +28,27 @@ class DiscreteEnv(Env):
"""
def __init__(self, nS, nA, P, isd):
self.action_space = spaces.Discrete(nA)
self.observation_space = spaces.Discrete(nS)
self.nA = nA
self.P = P
self.isd = isd
self.lastaction=None # for rendering
self.nS = nS
self.nA = nA
@property
def nS(self):
return self.observation_space.n
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(self.nA, np_random=self.np_random)
self.observation_space = spaces.Discrete(self.nS, np_random=self.np_random)
return [seed]
def _reset(self):
self.s = categorical_sample(self.isd)
self.s = categorical_sample(self.isd, self.np_random)
return self.s
def _step(self, a):
transitions = self.P[self.s][a]
i = categorical_sample([t[0] for t in transitions])
i = categorical_sample([t[0] for t in transitions], self.np_random)
p, s, r, d= transitions[i]
self.s = s
self.lastaction=a

View File

@@ -111,7 +111,7 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
rew = float(newletter == b'G')
li.append((1.0, newstate, rew, done))
super(FrozenLakeEnv, self).__init__(nrow * ncol, 4, P, isd)
super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
def _render(self, mode='human', close=False):
if close:

View File

@@ -1,7 +1,6 @@
import gym
import random
from gym import spaces
from gym.utils import seeding
class NChainEnv(gym.Env):
"""n-Chain environment
@@ -27,13 +26,18 @@ class NChainEnv(gym.Env):
self.slip = slip # probability of 'slipping' an action
self.small = small # payout for 'backwards' action
self.large = large # payout at end of chain for 'forwards' action
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Discrete(n)
self.state = 0 # Start at beginning of the chain
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Discrete(self.n, np_random=self.np_random)
return [seed]
def _step(self, action):
assert(self.action_space.contains(action))
if random.random() < self.slip:
if self.np_random.rand() < self.slip:
action = not action # agent slipped, reverse action taken
if action: # 'backwards': go back to the beginning, get small reward
reward = self.small

View File

@@ -2,6 +2,7 @@ import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
class RouletteEnv(gym.Env):
@@ -17,8 +18,15 @@ class RouletteEnv(gym.Env):
"""
def __init__(self, spots=37):
self.n = spots + 1
self.action_space = spaces.Discrete(self.n)
self.observation_space = spaces.Discrete(1)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(self.n, np_random=self.np_random)
self.observation_space = spaces.Discrete(1, np_random=self.np_random)
return [seed]
def _step(self, action):
assert(action >= 0 and action < self.n)
@@ -27,7 +35,7 @@ class RouletteEnv(gym.Env):
return 0, 0, True, {}
# N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
val = np.random.randint(0, self.n - 1)
val = self.np_random.randint(0, self.n - 1)
if val == action == 0:
reward = self.n - 2.0
elif val != 0 and action != 0 and val % 2 == action % 2:

View File

@@ -84,9 +84,6 @@ class TaxiEnv(discrete.DiscreteEnv):
isd /= isd.sum()
discrete.DiscreteEnv.__init__(self, nS, nA, P, isd)
self.observation_space = spaces.Discrete(500)
self.action_space = spaces.Discrete(6)
def encode(self, taxirow, taxicol, passloc, destidx):
# (5) 5, 5, 4
i = taxirow

View File

@@ -12,8 +12,14 @@ class UnregisteredEnv(Error):
pass
class DeprecatedEnv(Error):
"""Raised when the user requests an env from the registry with an older version
number than the latest env with the same name.
"""Raised when the user requests an env from the registry with an
older version number than the latest env with the same name.
"""
pass
class UnseedableEnv(Error):
"""Raised when the user tries to seed an env that does not support
seeding.
"""
pass

View File

@@ -10,7 +10,7 @@ import weakref
from gym import error, version
from gym.monitoring import stats_recorder, video_recorder
from gym.utils import atomic_write, closer
from gym.utils import atomic_write, closer, seeding
logger = logging.getLogger(__name__)
@@ -83,8 +83,9 @@ class Monitor(object):
self.enabled = False
self.episode_id = 0
self._monitor_id = None
self.seeds = None
def start(self, directory, video_callable=None, force=False, resume=False):
def start(self, directory, video_callable=None, force=False, resume=False, seed=None):
"""Start monitoring.
Args:
@@ -92,6 +93,7 @@ class Monitor(object):
video_callable (Optional[function]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes.
force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
resume (bool): Retain the training data already in this directory, which will be merged with our new data
seed (Optional[int]): The seed to run this environment with. By default, a random seed will be chosen.
"""
if self.env.spec is None:
logger.warn("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.")
@@ -116,6 +118,8 @@ class Monitor(object):
You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5])))
self._monitor_id = monitor_closer.register(self)
self.enabled = True
self.directory = os.path.abspath(directory)
# We use the 'openai-gym' prefix to determine if a file is
@@ -126,7 +130,9 @@ class Monitor(object):
self.configure(video_callable=video_callable)
if not os.path.exists(directory):
os.mkdir(directory)
self._monitor_id = monitor_closer.register(self)
seeds = self.env.seed(seed)
self.seeds = seeds
def flush(self):
"""Flush all relevant monitor information to disk."""
@@ -146,6 +152,7 @@ class Monitor(object):
'videos': [(os.path.basename(v), os.path.basename(m))
for v, m in self.videos],
'env_info': self._env_info(),
'seeds': self.seeds,
}, f)
def close(self):
@@ -249,13 +256,12 @@ class Monitor(object):
return self.video_callable(self.episode_id)
def _env_info(self):
if self.env.spec:
return {
'env_id': self.env.spec.id,
env_info = {
'gym_version': version.VERSION,
}
else:
return {}
if self.env.spec:
env_info['env_id'] = self.env.spec.id
return env_info
def __del__(self):
# Make sure we've closed up shop when garbage collecting
@@ -274,6 +280,8 @@ def load_results(training_dir):
# Load up stats + video files
stats_files = []
videos = []
main_seeds = []
seeds = []
env_infos = []
for manifest in manifests:
@@ -284,6 +292,13 @@ def load_results(training_dir):
videos += [(os.path.join(training_dir, v), os.path.join(training_dir, m))
for v, m in contents['videos']]
env_infos.append(contents['env_info'])
current_seeds = contents.get('seeds', [])
seeds += current_seeds
if current_seeds:
main_seeds.append(current_seeds[0])
else:
# current_seeds could be None or []
main_seeds.append(None)
env_info = collapse_env_infos(env_infos, training_dir)
timestamps, episode_lengths, episode_rewards, initial_reset_timestamp = merge_stats_files(stats_files)
@@ -296,6 +311,8 @@ def load_results(training_dir):
'episode_rewards': episode_rewards,
'initial_reset_timestamp': initial_reset_timestamp,
'videos': videos,
'main_seeds': main_seeds,
'seeds': seeds,
}
def merge_stats_files(stats_files):

View File

@@ -19,7 +19,7 @@ class StatsRecorder(object):
self.done = None
self.closed = False
filename = '{}.{}.stats.json'.format(self.file_prefix, os.getpid())
filename = '{}.stats.json'.format(self.file_prefix)
self.path = os.path.join(self.directory, filename)
def before_step(self, action):

View File

@@ -89,6 +89,8 @@ def upload_training_data(training_dir, api_key=None):
timestamps = results['timestamps']
episode_lengths = results['episode_lengths']
episode_rewards = results['episode_rewards']
main_seeds = results['main_seeds']
seeds = results['seeds']
videos = results['videos']
env_id = env_info['env_id']
@@ -96,7 +98,7 @@ def upload_training_data(training_dir, api_key=None):
# Do the relevant uploads
if len(episode_lengths) > 0:
training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key, env_id=env_id)
training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key, env_id=env_id)
else:
training_episode_batch = None
@@ -112,13 +114,15 @@ def upload_training_data(training_dir, api_key=None):
return env_info, training_episode_batch, training_video
def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key=None, env_id=None):
def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key=None, env_id=None):
logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
file_upload.put({
'episode_lengths': episode_lengths,
'episode_rewards': episode_rewards,
'timestamps': timestamps,
'main_seeds': main_seeds,
'seeds': seeds,
})
return file_upload

View File

@@ -6,12 +6,15 @@ class Box(Space):
A box in R^n.
I.e., each coordinate is bounded.
"""
def __init__(self, low, high, shape=None):
def __init__(self, low, high, shape=None, np_random=None):
"""
Two kinds of valid input:
Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
"""
if np_random is None:
np_random = np.random
self.np_random = np_random
if shape is None:
assert low.shape == high.shape
self.low = low
@@ -21,7 +24,7 @@ class Box(Space):
self.low = low + np.zeros(shape)
self.high = high + np.zeros(shape)
def sample(self):
return np.random.uniform(low=self.low, high=self.high, size=self.low.shape)
return self.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
def contains(self, x):
return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()

View File

@@ -5,10 +5,13 @@ class Discrete(Space):
"""
{0,1,...,n-1}
"""
def __init__(self, n):
def __init__(self, n, np_random=None):
if np_random is None:
np_random = np.random
self.np_random = np_random
self.n = n
def sample(self):
return np.random.randint(self.n)
return self.np_random.randint(self.n)
def contains(self, x):
if isinstance(x, int):
as_int = x

View File

@@ -13,13 +13,17 @@ class HighLow(Space):
e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
"""
def __init__(self, matrix):
def __init__(self, matrix, np_random=None):
"""
A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column
is the maximum (inclusive), and the third column is the precision (number of decimals to keep)
e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])
"""
if np_random is None:
np_random = np.random
self.np_random = np_random
(num_rows, num_cols) = matrix.shape
assert num_rows >= 1
assert num_cols == 3
@@ -29,7 +33,7 @@ class HighLow(Space):
def sample(self):
# For each row: round(random .* (max - min) + min, precision)
max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
random_matrix = np.multiply(max_minus_min, np.random.rand(self.num_rows, 1)) + self.matrix[:, 0]
random_matrix = np.multiply(max_minus_min, self.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
rounded_matrix = np.zeros(self.num_rows)
for i in range(self.num_rows):
rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))

97
gym/utils/seeding.py Normal file
View File

@@ -0,0 +1,97 @@
import hashlib
import numpy as np
import os
import random as _random
import struct
import sys
from gym import error
if sys.version_info < (3,):
integer_types = (int, long)
else:
integer_types = (int,)
# Fortunately not needed right now!
#
# def random(seed=None):
# seed = _seed(seed)
#
# rng = _random.Random()
# rng.seed(hash_seed(seed))
# return rng, seed
def np_random(seed=None):
if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed):
raise error.Error('Seed must be a non-negative integer or omitted, not {}'.format(seed))
seed = _seed(seed)
rng = np.random.RandomState()
rng.seed(_int_list_from_bigint(hash_seed(seed)))
return rng, seed
def hash_seed(seed, max_bytes=8):
"""Any given evaluation is likely to have many PRNG's active at
once. (Most commonly, because the environment is running in
multiple processes.) There's literature indicating that having
linear correlations between seeds of multiple PRNG's can correlate
the outputs:
http://blogs.unity3d.com/2015/01/07/a-primer-on-repeatable-random-numbers/
http://stackoverflow.com/questions/1554958/how-different-do-random-seeds-need-to-be
http://dl.acm.org/citation.cfm?id=1276928
Thus, for sanity we hash the seeds before using them. (This scheme
is likely not crypto-strength, but it should be good enough to get
rid of simple correlations.)
"""
hash = hashlib.sha512(str(seed).encode('utf8')).digest()
return _bigint_from_bytes(hash[:max_bytes])
def _seed(a=None, max_bytes=8):
"""Create a strong random seed. Otherwise, Python 2 would seed using
the system time, which might be non-robust especially in the
presence of concurrency.
Args:
a (Optional[int, str]): None seeds from an operating system specific randomness source. If an int or str passed, all of the bits are used.
"""
# Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
if a is None:
a = _bigint_from_bytes(os.urandom(max_bytes))
elif isinstance(a, str):
a = a.encode('utf8')
a += hashlib.sha512(a).digest()
a = _bigint_from_bytes(a[:max_bytes])
elif isinstance(a, integer_types):
a = a % 2**(8 * max_bytes)
else:
raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a))
return a
# TODO: don't hardcode sizeof_int here
def _bigint_from_bytes(bytes):
sizeof_int = 4
padding = sizeof_int - len(bytes) % sizeof_int
bytes += b'\0' * padding
int_count = int(len(bytes) / sizeof_int)
unpacked = struct.unpack("{}I".format(int_count), bytes)
accum = 0
for i, val in enumerate(unpacked):
accum += 2 ** (sizeof_int * 8 * i) * val
return accum
def _int_list_from_bigint(bigint):
# Special case 0
if bigint < 0:
raise error.Error('Seed must be non-negative, not {}'.format(bigint))
elif bigint == 0:
return [0]
ints = []
while bigint > 0:
bigint, mod = divmod(bigint, 2 ** 32)
ints.append(mod)
return ints

View File

@@ -0,0 +1,16 @@
from gym import error
from gym.utils import seeding
def test_invalid_seeds():
for seed in [-1, 'test']:
try:
seeding.np_random(seed)
except error.Error:
pass
else:
assert False, 'Invalid seed {} passed validation'.format(seed)
def test_valid_seeds():
for seed in [0, 1]:
random, seed1 = seeding.np_random(seed)
assert seed == seed1