mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-22 07:02:19 +00:00
[WIP] add support for seeding environments (#135)
* Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -23,8 +23,6 @@ ghostdriver.log
|
||||
|
||||
junk
|
||||
MUJOCO_LOG.txt
|
||||
mujoco-bundle
|
||||
|
||||
|
||||
rllab_mujoco
|
||||
|
||||
@@ -36,3 +34,4 @@ tutorial/*.html
|
||||
|
||||
# PyCharm project files
|
||||
.idea
|
||||
vizdoom.ini
|
||||
|
@@ -2,7 +2,7 @@ dist: trusty
|
||||
sudo: required
|
||||
cache:
|
||||
apt: true
|
||||
pip: false
|
||||
pip: true
|
||||
language: python
|
||||
addons:
|
||||
apt:
|
||||
@@ -30,6 +30,11 @@ before_install:
|
||||
# In a pull request, there are no secrets, and hence no MuJoCo:
|
||||
# https://docs.travis-ci.com/user/pull-requests#Security-Restrictions-when-testing-Pull-Requests.
|
||||
- '[ -z ${MUJOCO_KEY_BUNDLE+x} ] || ( curl https://openai-public.s3-us-west-2.amazonaws.com/mujoco/$MUJOCO_KEY_BUNDLE.tar.gz | tar xz -C ~/.mujoco )'
|
||||
# Without this line, Travis has fork()s fail with an out of memory
|
||||
# error. (These fork()s are for spawning the subprocess for video
|
||||
# recording.) We should debug the memory usage at some stage, but
|
||||
# simply setting overcommit is a good starting point.
|
||||
- sudo sysctl -w vm.overcommit_memory=1
|
||||
env:
|
||||
- DISPLAY=:12
|
||||
install: pip install tox-travis
|
||||
|
@@ -251,3 +251,10 @@ We are using `nose2 <https://github.com/nose-devs/nose2>`_ for tests. You can ru
|
||||
nose2
|
||||
|
||||
You can also run tests in a specific directory by using the ``-s`` option, or by passing in the specific name of the test. See the `nose2 docs <http://nose2.readthedocs.org/en/latest/usage.html#naming-tests>`_ for more details.
|
||||
|
||||
What's new
|
||||
----------
|
||||
|
||||
- 2016-05-28: For controlled reproducibility, envs now support seeding
|
||||
(cf #91 and #135). The monitor records which seeds are used. We will
|
||||
soon add seed information to the display on the scoreboard.
|
||||
|
@@ -19,14 +19,18 @@ if __name__ == '__main__':
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
env = gym.make('CartPole-v0' if len(sys.argv)<2 else sys.argv[1])
|
||||
agent = RandomAgent(env.action_space)
|
||||
|
||||
# You provide the directory to write to (can be an existing
|
||||
# directory, including one with existing data -- all monitor files
|
||||
# will be namespaced). You can also dump to a tempdir if you'd
|
||||
# like: tempfile.mkdtemp().
|
||||
outdir = '/tmp/random-agent-results'
|
||||
env.monitor.start(outdir, force=True)
|
||||
env.monitor.start(outdir, force=True, seed=0)
|
||||
|
||||
# This declaration must go *after* the monitor call, since the
|
||||
# monitor's seeding creates a new action_space instance with the
|
||||
# appropriate pseudorandom number generator.
|
||||
agent = RandomAgent(env.action_space)
|
||||
|
||||
episode_count = 100
|
||||
max_steps = 200
|
||||
|
@@ -1,9 +1,4 @@
|
||||
import hashlib
|
||||
import numpy as np
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import struct
|
||||
import sys
|
||||
|
||||
import gym
|
||||
@@ -40,48 +35,3 @@ def undo_logger_setup():
|
||||
root_logger.removeHandler(handler)
|
||||
gym.logger.setLevel(logging.NOTSET)
|
||||
requests_logger.setLevel(logging.NOTSET)
|
||||
|
||||
def seed(a=None):
|
||||
"""Seeds the 'random' and 'numpy.random' generators. By default,
|
||||
Python seeds these with the system time. Call this if you are
|
||||
using multiple processes.
|
||||
|
||||
Notes:
|
||||
SECURITY SENSITIVE: a bug here would allow people to generate fake results. Please let us know if you find one :).
|
||||
|
||||
Args:
|
||||
a (Optional[int, str]): None or no argument seeds from an operating system specific randomness source. If an int or str passed, then all of bits are used.
|
||||
"""
|
||||
# Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
|
||||
if a is None:
|
||||
a = bigint_from_bytes(os.urandom(32))
|
||||
|
||||
if isinstance(a, str):
|
||||
a = a.encode('utf8')
|
||||
a += hashlib.sha512(a).digest()
|
||||
a = bigint_from_bytes(a)
|
||||
|
||||
# Actually seed the generators
|
||||
random.seed(a)
|
||||
np.random.seed(int_list_from_bigint(a))
|
||||
|
||||
return a
|
||||
|
||||
# TODO: don't hardcode sizeof_int here
|
||||
def bigint_from_bytes(bytes):
|
||||
sizeof_int = 4
|
||||
padding = sizeof_int - len(bytes) % sizeof_int
|
||||
bytes += '\0' * padding
|
||||
int_count = len(bytes) / sizeof_int
|
||||
unpacked = struct.unpack("{}I".format(int_count), bytes)
|
||||
accum = 0
|
||||
for i, val in enumerate(unpacked):
|
||||
accum += 2 ** (sizeof_int * 8 * i) * val
|
||||
return accum
|
||||
|
||||
def int_list_from_bigint(bigint):
|
||||
ints = []
|
||||
while bigint > 0:
|
||||
bigint, mod = divmod(bigint, 2 ** 32)
|
||||
ints.append(mod)
|
||||
return ints
|
||||
|
27
gym/core.py
27
gym/core.py
@@ -17,10 +17,11 @@ class Env(object):
|
||||
|
||||
The main API methods that users of this class need to know are:
|
||||
|
||||
reset
|
||||
step
|
||||
reset
|
||||
render
|
||||
close
|
||||
seed
|
||||
|
||||
When implementing an environment, override the following methods
|
||||
in your subclass:
|
||||
@@ -28,6 +29,8 @@ class Env(object):
|
||||
_step
|
||||
_reset
|
||||
_render
|
||||
_close
|
||||
_seed
|
||||
|
||||
And set the following attributes:
|
||||
|
||||
@@ -70,6 +73,7 @@ class Env(object):
|
||||
if close:
|
||||
return
|
||||
raise NotImplementedError
|
||||
def _seed(self, seed=None): return []
|
||||
|
||||
@property
|
||||
def monitor(self):
|
||||
@@ -172,7 +176,9 @@ class Env(object):
|
||||
Environments will automatically close() themselves when
|
||||
garbage collected or when the program exits.
|
||||
"""
|
||||
if self._closed:
|
||||
# _closed will be missing if this instance is still
|
||||
# initializing.
|
||||
if not hasattr(self, '_closed') or self._closed:
|
||||
return
|
||||
|
||||
self._close()
|
||||
@@ -181,6 +187,23 @@ class Env(object):
|
||||
# end up with double close.
|
||||
self._closed = True
|
||||
|
||||
def seed(self, seed=None):
|
||||
"""Sets the seed for this env's random number generator(s).
|
||||
|
||||
Note:
|
||||
Some environments use multiple pseudorandom number generators.
|
||||
We want to capture all such seeds used in order to ensure that
|
||||
there aren't accidental correlations between multiple generators.
|
||||
|
||||
Returns:
|
||||
list<bigint>: Returns the list of seeds used in this env's random
|
||||
number generators. The first value in the list should be the
|
||||
"main" seed, or the value which a reproducer should pass to
|
||||
'seed'. Often, the main seed equals the provided 'seed', but
|
||||
this won't be true if seed=None, for example.
|
||||
"""
|
||||
return self._seed(seed)
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
|
@@ -228,11 +228,21 @@ for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', '
|
||||
name = ''.join([g.capitalize() for g in game.split('_')])
|
||||
if obs_type == 'ram':
|
||||
name = '{}-ram'.format(name)
|
||||
|
||||
nondeterministic = False
|
||||
if game == 'elevator_action' and obs_type == 'ram':
|
||||
# ElevatorAction-ram-v0 seems to yield slightly
|
||||
# non-deterministic observations about 10% of the time. We
|
||||
# should track this down eventually, but for now we just
|
||||
# mark it as nondetermistic.
|
||||
nondeterministic = True
|
||||
|
||||
register(
|
||||
id='{}-v0'.format(name),
|
||||
entry_point='gym.envs.atari:AtariEnv',
|
||||
kwargs={'game': game, 'obs_type': obs_type},
|
||||
timestep_limit=10000,
|
||||
nondeterministic=nondeterministic,
|
||||
)
|
||||
|
||||
# Board games
|
||||
@@ -248,6 +258,11 @@ register(
|
||||
'illegal_move_mode': 'lose',
|
||||
'board_size': 9,
|
||||
},
|
||||
# The pachi player seems not to be determistic given a fixed seed.
|
||||
# (Reproduce by running 'import gym; h = gym.make('Go9x9-v0'); h.seed(1); h.reset(); h.step(15); h.step(16); h.step(17)' a few times.)
|
||||
#
|
||||
# This is probably due to a computation time limit.
|
||||
nondetermistic=True,
|
||||
)
|
||||
|
||||
register(
|
||||
@@ -260,6 +275,7 @@ register(
|
||||
'illegal_move_mode': 'lose',
|
||||
'board_size': 19,
|
||||
},
|
||||
nondetermistic=True,
|
||||
)
|
||||
|
||||
register(
|
||||
|
@@ -1,8 +1,7 @@
|
||||
from gym import Env
|
||||
from gym.spaces import Discrete, Tuple
|
||||
from gym.utils import colorize
|
||||
from gym.utils import colorize, seeding
|
||||
import numpy as np
|
||||
import random
|
||||
from six import StringIO
|
||||
import sys
|
||||
import math
|
||||
@@ -17,6 +16,7 @@ class AlgorithmicEnv(Env):
|
||||
|
||||
def __init__(self, inp_dim=1, base=10, chars=False):
|
||||
global hash_base
|
||||
|
||||
hash_base = 50 ** np.arange(inp_dim)
|
||||
self.base = base
|
||||
self.last = 10
|
||||
@@ -27,10 +27,17 @@ class AlgorithmicEnv(Env):
|
||||
self.inp_dim = inp_dim
|
||||
AlgorithmicEnv.current_length = 2
|
||||
tape_control = []
|
||||
self.action_space = Tuple(([Discrete(2 * inp_dim), Discrete(2), Discrete(self.base)]))
|
||||
self.observation_space = Discrete(self.base + 1)
|
||||
|
||||
self._seed()
|
||||
self.reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
self.action_space = Tuple(([Discrete(2 * self.inp_dim, np_random=self.np_random), Discrete(2, np_random=self.np_random), Discrete(self.base, np_random=self.np_random)]))
|
||||
self.observation_space = Discrete(self.base + 1, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _get_obs(self, pos=None):
|
||||
if pos is None:
|
||||
pos = self.x
|
||||
@@ -198,6 +205,6 @@ class AlgorithmicEnv(Env):
|
||||
AlgorithmicEnv.sum_rewards = []
|
||||
self.sum_reward = 0.0
|
||||
self.time = 0
|
||||
self.total_len = random.randrange(3) + AlgorithmicEnv.current_length
|
||||
self.total_len = self.np_random.randint(3) + AlgorithmicEnv.current_length
|
||||
self.set_data()
|
||||
return self._get_obs()
|
||||
|
@@ -2,7 +2,6 @@
|
||||
Task is to copy content from the input tape to
|
||||
the output tape. http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
import random
|
||||
import numpy as np
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
from gym.envs.algorithmic.algorithmic_env import ha
|
||||
@@ -17,8 +16,7 @@ class CopyEnv(algorithmic_env.AlgorithmicEnv):
|
||||
self.content = {}
|
||||
self.target = {}
|
||||
for i in range(self.total_len):
|
||||
val = random.randrange(self.base)
|
||||
val = self.np_random.randint(self.base)
|
||||
self.content[ha(np.array([i]))] = val
|
||||
self.target[i] = val
|
||||
self.total_reward = self.total_len
|
||||
|
||||
|
@@ -3,7 +3,6 @@ Task is to return every second character from the input tape.
|
||||
http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
|
||||
import random
|
||||
import numpy as np
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
from gym.envs.algorithmic.algorithmic_env import ha
|
||||
@@ -20,7 +19,7 @@ class DuplicatedInputEnv(algorithmic_env.AlgorithmicEnv):
|
||||
self.target = {}
|
||||
copies = int(self.total_len / self.duplication)
|
||||
for i in range(copies):
|
||||
val = random.randrange(self.base)
|
||||
val = self.np_random.randint(self.base)
|
||||
self.target[i] = val
|
||||
for d in range(self.duplication):
|
||||
self.content[ha(np.array([i * self.duplication + d]))] = val
|
||||
|
@@ -2,7 +2,6 @@
|
||||
Task is to copy content multiple-times from the input tape to
|
||||
the output tape. http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
import random
|
||||
import numpy as np
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
from gym.envs.algorithmic.algorithmic_env import ha
|
||||
@@ -20,10 +19,9 @@ class RepeatCopyEnv(algorithmic_env.AlgorithmicEnv):
|
||||
self.target = {}
|
||||
unique = set()
|
||||
for i in range(self.total_len):
|
||||
val = random.randrange(self.base)
|
||||
val = self.np_random.randint(self.base)
|
||||
self.content[ha(np.array([i]))] = val
|
||||
self.target[i] = val
|
||||
self.target[2 * self.total_len - i - 1] = val
|
||||
self.target[2 * self.total_len + i] = val
|
||||
self.total_reward = 3.0 * self.total_len + 0.9
|
||||
|
||||
|
@@ -3,7 +3,6 @@ Task is to reverse content over the input tape.
|
||||
http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
|
||||
import random
|
||||
import numpy as np
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
from gym.envs.algorithmic.algorithmic_env import ha
|
||||
@@ -21,7 +20,7 @@ class ReverseEnv(algorithmic_env.AlgorithmicEnv):
|
||||
self.content = {}
|
||||
self.target = {}
|
||||
for i in range(self.total_len):
|
||||
val = random.randrange(self.base)
|
||||
val = self.np_random.randint(self.base)
|
||||
self.content[ha(np.array([i]))] = val
|
||||
self.target[self.total_len - i - 1] = val
|
||||
self.total_reward = self.total_len + 0.9
|
||||
|
@@ -1,4 +1,3 @@
|
||||
import random
|
||||
import numpy as np
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
from gym.envs.algorithmic.algorithmic_env import ha
|
||||
@@ -17,7 +16,7 @@ class ReversedAdditionEnv(algorithmic_env.AlgorithmicEnv):
|
||||
for i in range(self.total_len):
|
||||
vals = []
|
||||
for k in range(self.rows):
|
||||
val = random.randrange(self.base)
|
||||
val = self.np_random.randint(self.base)
|
||||
self.content[ha(np.array([i, k]))] = val
|
||||
vals.append(val)
|
||||
total = sum(vals) + curry
|
||||
@@ -26,5 +25,3 @@ class ReversedAdditionEnv(algorithmic_env.AlgorithmicEnv):
|
||||
if curry > 0:
|
||||
self.target[self.total_len] = curry
|
||||
self.total_reward = self.total_len
|
||||
|
||||
|
||||
|
@@ -3,6 +3,7 @@ import os
|
||||
import gym
|
||||
from gym import error, spaces
|
||||
from gym import utils
|
||||
from gym.utils import seeding
|
||||
|
||||
try:
|
||||
import atari_py
|
||||
@@ -30,29 +31,42 @@ class AtariEnv(gym.Env, utils.EzPickle):
|
||||
def __init__(self, game='pong', obs_type='ram'):
|
||||
utils.EzPickle.__init__(self, game, obs_type)
|
||||
assert obs_type in ('ram', 'image')
|
||||
game_path = atari_py.get_game_path(game)
|
||||
if not os.path.exists(game_path):
|
||||
raise IOError('You asked for game %s but path %s does not exist'%(game, game_path))
|
||||
self.ale = atari_py.ALEInterface()
|
||||
self.ale.loadROM(game_path)
|
||||
|
||||
self.game_path = atari_py.get_game_path(game)
|
||||
if not os.path.exists(self.game_path):
|
||||
raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
|
||||
self._obs_type = obs_type
|
||||
self._action_set = self.ale.getMinimalActionSet()
|
||||
self.ale = atari_py.ALEInterface()
|
||||
self.viewer = None
|
||||
|
||||
(screen_width,screen_height) = self.ale.getScreenDims()
|
||||
self._seed()
|
||||
|
||||
self.action_space = spaces.Discrete(len(self._action_set))
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed. This gets passed as a uint, but gets
|
||||
# checked as an int elsewhere, so we need to keep it below
|
||||
# 2**31.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**31
|
||||
# Empirically, we need to seed before loading the ROM.
|
||||
self.ale.setInt(b'random_seed', seed2)
|
||||
self.ale.loadROM(self.game_path)
|
||||
self._action_set = self.ale.getMinimalActionSet()
|
||||
|
||||
self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)
|
||||
|
||||
(screen_width,screen_height) = self.ale.getScreenDims()
|
||||
if self._obs_type == 'ram':
|
||||
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
|
||||
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
|
||||
elif self._obs_type == 'image':
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
|
||||
else:
|
||||
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
|
||||
return [seed1, seed2]
|
||||
|
||||
def _step(self, a):
|
||||
reward = 0.0
|
||||
action = self._action_set[a]
|
||||
num_steps = np.random.randint(2, 5)
|
||||
num_steps = self.np_random.randint(2, 5)
|
||||
for _ in range(num_steps):
|
||||
reward += self.ale.act(action)
|
||||
ob = self._get_obs()
|
||||
|
@@ -8,6 +8,7 @@ except ImportError as e:
|
||||
import numpy as np
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
from six import StringIO
|
||||
import sys
|
||||
import six
|
||||
@@ -71,10 +72,12 @@ class GoState(object):
|
||||
|
||||
|
||||
### Adversary policies ###
|
||||
def make_random_policy(np_random):
|
||||
def random_policy(curr_state, prev_state, prev_action):
|
||||
b = curr_state.board
|
||||
legal_coords = b.get_legal_coords(curr_state.color)
|
||||
return _coord_to_action(b, np.random.choice(legal_coords))
|
||||
return _coord_to_action(b, np_random.choice(legal_coords))
|
||||
return random_policy
|
||||
|
||||
def make_pachi_policy(board, engine_type='uct', threads=1, pachi_timestr=''):
|
||||
engine = pachi_py.PyPachiEngine(board, engine_type, six.b('threads=%d' % threads))
|
||||
@@ -122,16 +125,18 @@ class GoEnv(gym.Env):
|
||||
metadata = {"render.modes": ["human", "ansi"]}
|
||||
|
||||
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size):
|
||||
'''
|
||||
"""
|
||||
Args:
|
||||
player_color: Stone color for the agent. Either 'black' or 'white'
|
||||
opponent: An opponent policy
|
||||
observation_type: State encoding
|
||||
illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
|
||||
'''
|
||||
"""
|
||||
assert isinstance(board_size, int) and board_size >= 1, 'Invalid board size: {}'.format(board_size)
|
||||
self.board_size = board_size
|
||||
|
||||
self._seed()
|
||||
|
||||
colormap = {
|
||||
'black': pachi_py.BLACK,
|
||||
'white': pachi_py.WHITE,
|
||||
@@ -150,17 +155,22 @@ class GoEnv(gym.Env):
|
||||
assert illegal_move_mode in ['lose', 'raise']
|
||||
self.illegal_move_mode = illegal_move_mode
|
||||
|
||||
# One action for each board position, pass, and resign
|
||||
self.action_space = spaces.Discrete(self.board_size**2 + 2)
|
||||
|
||||
if self.observation_type == 'image3c':
|
||||
shape = pachi_py.CreateBoard(self.board_size).encode().shape
|
||||
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
|
||||
else:
|
||||
if self.observation_type != 'image3c':
|
||||
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
|
||||
|
||||
self.reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
pachi_py.pachi_srand(seed2)
|
||||
|
||||
shape = pachi_py.CreateBoard(self.board_size).encode().shape
|
||||
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
|
||||
# One action for each board position, pass, and resign
|
||||
self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
|
||||
return [seed1, seed2]
|
||||
|
||||
def _reset(self):
|
||||
self.state = GoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK)
|
||||
|
||||
@@ -250,7 +260,7 @@ class GoEnv(gym.Env):
|
||||
|
||||
def _reset_opponent(self, board):
|
||||
if self.opponent == 'random':
|
||||
self.opponent_policy = random_policy
|
||||
self.opponent_policy = make_random_policy(self.np_random)
|
||||
elif self.opponent == 'pachi:uct:_2400':
|
||||
self.opponent_policy = make_pachi_policy(board=board, engine_type=six.b('uct'), pachi_timestr=six.b('_2400')) # TODO: strength as argument
|
||||
else:
|
||||
|
@@ -8,13 +8,14 @@ import gym
|
||||
from gym import spaces
|
||||
import numpy as np
|
||||
from gym import error
|
||||
from gym.utils import seeding
|
||||
|
||||
|
||||
def make_random_policy(np_random):
|
||||
def random_policy(state):
|
||||
possible_moves = HexEnv.get_possible_actions(state)
|
||||
a = np.random.randint(len(possible_moves))
|
||||
a = np_random.randint(len(possible_moves))
|
||||
return possible_moves[a]
|
||||
|
||||
return random_policy
|
||||
|
||||
class HexEnv(gym.Env):
|
||||
"""
|
||||
@@ -46,13 +47,6 @@ class HexEnv(gym.Env):
|
||||
raise error.Error("player_color must be 'black' or 'white', not {}".format(player_color))
|
||||
|
||||
self.opponent = opponent
|
||||
if isinstance(self.opponent, str):
|
||||
if opponent == 'random':
|
||||
self.opponent_policy = random_policy
|
||||
else:
|
||||
raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
|
||||
else:
|
||||
self.opponent_policy = opponent
|
||||
|
||||
assert observation_type in ['numpy3c']
|
||||
self.observation_type = observation_type
|
||||
@@ -60,14 +54,28 @@ class HexEnv(gym.Env):
|
||||
assert illegal_move_mode in ['lose', 'raise']
|
||||
self.illegal_move_mode = illegal_move_mode
|
||||
|
||||
# One action for each board position and resign
|
||||
self.action_space = spaces.Discrete(self.board_size ** 2 + 1)
|
||||
|
||||
if self.observation_type != 'numpy3c':
|
||||
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
# One action for each board position and resign
|
||||
self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
|
||||
observation = self.reset()
|
||||
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape))
|
||||
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)
|
||||
|
||||
# Update the random policy if needed
|
||||
if isinstance(self.opponent, str):
|
||||
if self.opponent == 'random':
|
||||
self.opponent_policy = make_random_policy(self.np_random)
|
||||
else:
|
||||
raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
|
||||
else:
|
||||
self.opponent_policy = self.opponent
|
||||
|
||||
return [seed]
|
||||
|
||||
def _reset(self):
|
||||
self.state = np.zeros((3, self.board_size, self.board_size))
|
||||
|
@@ -6,6 +6,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import colorize, seeding
|
||||
|
||||
# This is simple 4-joints walker robot environment.
|
||||
#
|
||||
@@ -86,12 +87,9 @@ class BipedalWalker(gym.Env):
|
||||
hardcore = False
|
||||
|
||||
def __init__(self):
|
||||
self._seed()
|
||||
self.viewer = None
|
||||
|
||||
high = np.array([np.inf]*24)
|
||||
self.action_space = spaces.Box( np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]) )
|
||||
self.observation_space = spaces.Box(-high, high)
|
||||
|
||||
self.world = Box2D.b2World()
|
||||
self.terrain = None
|
||||
self.hull = None
|
||||
@@ -99,6 +97,13 @@ class BipedalWalker(gym.Env):
|
||||
self.prev_shaping = None
|
||||
self._reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
high = np.array([np.inf]*24)
|
||||
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
if not self.terrain: return
|
||||
self.world.contactListener = None
|
||||
@@ -128,11 +133,11 @@ class BipedalWalker(gym.Env):
|
||||
|
||||
if state==GRASS and not oneshot:
|
||||
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
|
||||
if i > TERRAIN_STARTPAD: velocity += np.random.uniform(-1, 1)/SCALE #1
|
||||
if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE #1
|
||||
y += velocity
|
||||
|
||||
elif state==PIT and oneshot:
|
||||
counter = np.random.randint(3, 5)
|
||||
counter = self.np_random.randint(3, 5)
|
||||
poly = [
|
||||
(x, y),
|
||||
(x+TERRAIN_STEP, y),
|
||||
@@ -162,7 +167,7 @@ class BipedalWalker(gym.Env):
|
||||
y -= 4*TERRAIN_STEP
|
||||
|
||||
elif state==STUMP and oneshot:
|
||||
counter = np.random.randint(1, 3)
|
||||
counter = self.np_random.randint(1, 3)
|
||||
poly = [
|
||||
(x, y),
|
||||
(x+counter*TERRAIN_STEP, y),
|
||||
@@ -178,9 +183,9 @@ class BipedalWalker(gym.Env):
|
||||
self.terrain.append(t)
|
||||
|
||||
elif state==STAIRS and oneshot:
|
||||
stair_height = +1 if np.random.ranf() > 0.5 else -1
|
||||
stair_width = np.random.randint(4, 5)
|
||||
stair_steps = np.random.randint(3, 5)
|
||||
stair_height = +1 if self.np_random.rand() > 0.5 else -1
|
||||
stair_width = self.np_random.randint(4, 5)
|
||||
stair_steps = self.np_random.randint(3, 5)
|
||||
original_y = y
|
||||
for s in range(stair_steps):
|
||||
poly = [
|
||||
@@ -207,9 +212,9 @@ class BipedalWalker(gym.Env):
|
||||
self.terrain_y.append(y)
|
||||
counter -= 1
|
||||
if counter==0:
|
||||
counter = np.random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
|
||||
counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
|
||||
if state==GRASS and hardcore:
|
||||
state = np.random.randint(1, _STATES_)
|
||||
state = self.np_random.randint(1, _STATES_)
|
||||
oneshot = True
|
||||
else:
|
||||
state = GRASS
|
||||
@@ -240,11 +245,11 @@ class BipedalWalker(gym.Env):
|
||||
# Sorry for the clouds, couldn't resist
|
||||
self.cloud_poly = []
|
||||
for i in range(TERRAIN_LENGTH//20):
|
||||
x = np.random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
|
||||
x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
|
||||
y = VIEWPORT_H/SCALE*3/4
|
||||
poly = [
|
||||
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP),
|
||||
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP) )
|
||||
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
|
||||
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
|
||||
for a in range(5) ]
|
||||
x1 = min( [p[0] for p in poly] )
|
||||
x2 = max( [p[0] for p in poly] )
|
||||
@@ -278,7 +283,7 @@ class BipedalWalker(gym.Env):
|
||||
)
|
||||
self.hull.color1 = (0.5,0.4,0.9)
|
||||
self.hull.color2 = (0.3,0.3,0.5)
|
||||
self.hull.ApplyForceToCenter((np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
|
||||
self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
|
||||
|
||||
self.legs = []
|
||||
self.joints = []
|
||||
|
@@ -7,6 +7,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.envs.classic_control import rendering
|
||||
from gym.utils import colorize, seeding
|
||||
|
||||
import pyglet
|
||||
from pyglet.gl import *
|
||||
@@ -106,8 +107,7 @@ class CarRacing(gym.Env):
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]) ) # steer, gas, brake
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
|
||||
self._seed()
|
||||
self.world = Box2D.b2World((0,0), contactListener=FrictionDetector(self))
|
||||
self.viewer = None
|
||||
self.invisible_state_window = None
|
||||
@@ -117,6 +117,12 @@ class CarRacing(gym.Env):
|
||||
self.reward = 0.0
|
||||
self.prev_reward = 0.0
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random) # steer, gas, brake
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
if not self.road: return
|
||||
for t in self.road:
|
||||
@@ -130,8 +136,8 @@ class CarRacing(gym.Env):
|
||||
# Create checkpoints
|
||||
checkpoints = []
|
||||
for c in range(CHECKPOINTS):
|
||||
alpha = 2*math.pi*c/CHECKPOINTS + np.random.uniform(0, 2*math.pi*1/CHECKPOINTS)
|
||||
rad = np.random.uniform(TRACK_RAD/3, TRACK_RAD)
|
||||
alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
|
||||
rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
|
||||
if c==0:
|
||||
alpha = 0
|
||||
rad = 1.5*TRACK_RAD
|
||||
|
@@ -6,6 +6,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
# Rocket trajectory optimization is a classic topic in Optimal Control.
|
||||
#
|
||||
@@ -76,12 +77,9 @@ class LunarLander(gym.Env):
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self._seed()
|
||||
self.viewer = None
|
||||
|
||||
high = np.array([np.inf]*8) # useful range is -1 .. +1
|
||||
self.action_space = spaces.Discrete(4) # nop, fire left engine, main engine, right engine
|
||||
self.observation_space = spaces.Box(-high, high)
|
||||
|
||||
self.world = Box2D.b2World()
|
||||
self.moon = None
|
||||
self.lander = None
|
||||
@@ -90,6 +88,16 @@ class LunarLander(gym.Env):
|
||||
self.prev_reward = None
|
||||
self._reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
# useful range is -1 .. +1
|
||||
high = np.array([np.inf]*8)
|
||||
# nop, fire left engine, main engine, right engine
|
||||
self.action_space = spaces.Discrete(4, np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
if not self.moon: return
|
||||
self.world.contactListener = None
|
||||
@@ -112,7 +120,7 @@ class LunarLander(gym.Env):
|
||||
|
||||
# terrain
|
||||
CHUNKS = 11
|
||||
height = np.random.uniform(0, H/2, size=(CHUNKS+1,) )
|
||||
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
|
||||
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
|
||||
self.helipad_x1 = chunk_x[CHUNKS//2-1]
|
||||
self.helipad_x2 = chunk_x[CHUNKS//2+1]
|
||||
@@ -153,8 +161,8 @@ class LunarLander(gym.Env):
|
||||
self.lander.color1 = (0.5,0.4,0.9)
|
||||
self.lander.color2 = (0.3,0.3,0.5)
|
||||
self.lander.ApplyForceToCenter( (
|
||||
np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
|
||||
np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
|
||||
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
|
||||
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
|
||||
), True)
|
||||
|
||||
self.legs = []
|
||||
@@ -222,7 +230,7 @@ class LunarLander(gym.Env):
|
||||
# Engines
|
||||
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
|
||||
side = (-tip[1], tip[0]);
|
||||
dispersion = [np.random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
||||
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
||||
if action==2: # Main engine
|
||||
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
|
||||
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
|
||||
@@ -368,4 +376,3 @@ if __name__=="__main__":
|
||||
|
||||
env.render()
|
||||
if done: break
|
||||
|
||||
|
@@ -1,5 +1,6 @@
|
||||
"""classic Acrobot task"""
|
||||
from gym import core, spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
@@ -78,14 +79,20 @@ class AcrobotEnv(core.Env):
|
||||
actions_num = 3
|
||||
|
||||
def __init__(self):
|
||||
self.viewer = None
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
|
||||
low = -high
|
||||
self.observation_space = spaces.Box(low, high)
|
||||
self.action_space = spaces.Discrete(3)
|
||||
self.viewer = None
|
||||
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
|
||||
self.action_space = spaces.Discrete(3, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _reset(self):
|
||||
self.state = np.random.uniform(low=-0.1, high=0.1, size=(4,))
|
||||
self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
|
||||
return self.state
|
||||
|
||||
def _step(self, a):
|
||||
@@ -94,7 +101,7 @@ class AcrobotEnv(core.Env):
|
||||
|
||||
# Add noise to the force action
|
||||
if self.torque_noise_max > 0:
|
||||
torque += np.random.uniform(-self.torque_noise_max, self.torque_noise_max)
|
||||
torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
|
||||
|
||||
# Now, augment the state with our force action so it can be passed to
|
||||
# _dsdt
|
||||
|
@@ -7,6 +7,7 @@ import logging
|
||||
import math
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -30,15 +31,20 @@ class CartPoleEnv(gym.Env):
|
||||
# Angle at which to fail the episode
|
||||
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
||||
self.x_threshold = 2.4
|
||||
|
||||
self._seed()
|
||||
self.reset()
|
||||
self.viewer = None
|
||||
|
||||
self.steps_beyond_done = None
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
|
||||
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
|
||||
self.action_space = spaces.Discrete(2)
|
||||
self.observation_space = spaces.Box(-high, high)
|
||||
|
||||
self.steps_beyond_done = None
|
||||
self.action_space = spaces.Discrete(2, np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
action = action
|
||||
@@ -77,7 +83,7 @@ class CartPoleEnv(gym.Env):
|
||||
return np.array(self.state), reward, done, {}
|
||||
|
||||
def _reset(self):
|
||||
self.state = np.random.uniform(low=-0.05, high=0.05, size=(4,))
|
||||
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
|
||||
self.steps_beyond_done = None
|
||||
return np.array(self.state)
|
||||
|
||||
|
@@ -5,6 +5,7 @@ https://webdocs.cs.ualberta.ca/~sutton/MountainCar/MountainCar1.cp
|
||||
import math
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
|
||||
class MountainCarEnv(gym.Env):
|
||||
@@ -14,10 +15,6 @@ class MountainCarEnv(gym.Env):
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
self.viewer = None
|
||||
self.reset()
|
||||
|
||||
self.min_position = -1.2
|
||||
self.max_position = 0.6
|
||||
self.max_speed = 0.07
|
||||
@@ -26,8 +23,16 @@ class MountainCarEnv(gym.Env):
|
||||
self.low = np.array([self.min_position, -self.max_speed])
|
||||
self.high = np.array([self.max_position, self.max_speed])
|
||||
|
||||
self.action_space = spaces.Discrete(3)
|
||||
self.observation_space = spaces.Box(self.low, self.high)
|
||||
self.viewer = None
|
||||
|
||||
self._seed()
|
||||
self.reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(3, np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
# action = np.sign((self.state[0]+math.pi/2) * self.state[1])+1
|
||||
@@ -48,7 +53,7 @@ class MountainCarEnv(gym.Env):
|
||||
return np.array(self.state), reward, done, {}
|
||||
|
||||
def _reset(self):
|
||||
self.state = np.array([np.random.uniform(low=-0.6, high=-0.4), 0])
|
||||
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
|
||||
return np.array(self.state)
|
||||
|
||||
def _height(self, xs):
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
from os import path
|
||||
|
||||
@@ -14,10 +15,15 @@ class PendulumEnv(gym.Env):
|
||||
self.max_torque=2.
|
||||
self.dt=.05
|
||||
self.viewer = None
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
high = np.array([1., 1., self.max_speed])
|
||||
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,))
|
||||
self.observation_space = spaces.Box(low=-high, high=high)
|
||||
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self,u):
|
||||
th, thdot = self.state # th := theta
|
||||
@@ -40,7 +46,7 @@ class PendulumEnv(gym.Env):
|
||||
|
||||
def _reset(self):
|
||||
high = np.array([np.pi, 1])
|
||||
self.state = np.random.uniform(low=-high, high=high)
|
||||
self.state = self.np_random.uniform(low=-high, high=high)
|
||||
self.last_u = None
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -49,10 +50,19 @@ class DoomBasicEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_map('map01')
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 3 allowed actions [0, 9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -50,10 +51,20 @@ class DoomCorridorEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_scenario_path(self.loader.get_scenario_path('deadly_corridor.wad'))
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# action indexes are [0, 9, 10, 12, 13, 14]
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# action indexes are [0, 9, 10, 12, 13, 14]
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -5,6 +5,7 @@ import numpy as np
|
||||
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.utils import seeding
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -40,10 +41,20 @@ class DoomDeathmatchEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_scenario_path(self.loader.get_scenario_path('deathmatch.wad'))
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 41 allowed actions (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 41 allowed actions (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -49,10 +50,20 @@ class DoomDefendCenterEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_center.wad'))
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -49,10 +50,19 @@ class DoomDefendLineEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_line.wad'))
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self._seed()
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -47,10 +48,20 @@ class DoomHealthGatheringEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_map('map01')
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -46,10 +47,20 @@ class DoomMyWayHomeEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_scenario_path(self.loader.get_scenario_path('my_way_home.wad'))
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -51,10 +52,20 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_map('map01')
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
|
@@ -6,6 +6,7 @@ import numpy as np
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
from gym import error, spaces
|
||||
from gym.envs.doom import doom_env
|
||||
from gym.utils import seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -44,10 +45,21 @@ class DoomTakeCoverEnv(doom_env.DoomEnv):
|
||||
self.game.set_doom_map('map01')
|
||||
self.screen_height = 480 # Must match .cfg file
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
# 2 allowed actions [9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 2 allowed actions [9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
|
||||
return [seed1, seed2]
|
||||
|
@@ -36,8 +36,8 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + np.random.uniform(size=self.model.nq,low=-.1,high=.1)
|
||||
qvel = self.init_qvel + np.random.randn(self.model.nv) * .1
|
||||
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq,low=-.1,high=.1)
|
||||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -25,8 +25,8 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq)
|
||||
qvel = self.init_qvel + np.random.randn(self.model.nv) * .1
|
||||
qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
|
||||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -28,8 +28,8 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + np.random.uniform(low=-.005, high=.005, size=self.model.nq)
|
||||
qvel = self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
|
||||
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -39,8 +39,8 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def reset_model(self):
|
||||
c = 0.01
|
||||
self.set_state(
|
||||
self.init_qpos + np.random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel + np.random.uniform(low=-c, high=c, size=self.model.nv,)
|
||||
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -40,8 +40,8 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def reset_model(self):
|
||||
c = 0.01
|
||||
self.set_state(
|
||||
self.init_qpos + np.random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel + np.random.uniform(low=-c, high=c, size=self.model.nv,)
|
||||
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -31,8 +31,8 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq),
|
||||
self.init_qvel + np.random.randn(self.model.nv) * .1
|
||||
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -16,8 +16,8 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
return ob, reward, done, {}
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + np.random.uniform(size=self.model.nq, low=-0.01, high=0.01)
|
||||
qvel = self.init_qvel + np.random.uniform(size=self.model.nv, low=-0.01, high=0.01)
|
||||
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
|
||||
qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
|
||||
from gym import error, spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
from os import path
|
||||
import gym
|
||||
@@ -13,9 +14,7 @@ except ImportError as e:
|
||||
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
|
||||
|
||||
class MujocoEnv(gym.Env):
|
||||
|
||||
"""
|
||||
Superclass of MuJoCo environments.
|
||||
"""Superclass for all MuJoCo environments.
|
||||
"""
|
||||
|
||||
def __init__(self, model_path, frame_skip):
|
||||
@@ -40,15 +39,20 @@ class MujocoEnv(gym.Env):
|
||||
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
|
||||
assert not done
|
||||
self.obs_dim = observation.size
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
bounds = self.model.actuator_ctrlrange.copy()
|
||||
low = bounds[:, 0]
|
||||
high = bounds[:, 1]
|
||||
self.action_space = spaces.Box(low, high)
|
||||
self.action_space = spaces.Box(low, high, np_random=self.np_random)
|
||||
|
||||
high = np.inf*np.ones(self.obs_dim)
|
||||
low = -high
|
||||
self.observation_space = spaces.Box(low, high)
|
||||
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
# methods to override:
|
||||
# ----------------------------
|
||||
|
@@ -21,12 +21,12 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
self.viewer.cam.trackbodyid=0
|
||||
|
||||
def reset_model(self):
|
||||
qpos = np.random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
||||
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
||||
while True:
|
||||
self.goal = np.random.uniform(low=-.2, high=.2, size=2)
|
||||
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
|
||||
if np.linalg.norm(self.goal) < 2: break
|
||||
qpos[-2:] = self.goal
|
||||
qvel = self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
qvel[-2:] = 0
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
@@ -26,7 +26,7 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq),
|
||||
self.init_qvel + np.random.uniform(low=-.1, high=.1, size=self.model.nv)
|
||||
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
|
||||
)
|
||||
return self._get_obs()
|
||||
|
@@ -28,8 +28,8 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos + np.random.uniform(low=-.005, high=.005, size=self.model.nq),
|
||||
self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
|
@@ -24,6 +24,7 @@ class EnvSpec(object):
|
||||
trials (int): The number of trials to average reward over
|
||||
reward_threshold (Optional[int]): The reward threshold before the task is considered solved
|
||||
kwargs (dict): The kwargs to pass to the environment class
|
||||
nondeterministic (bool): Whether this environment is non-deterministic even after seeding
|
||||
|
||||
Attributes:
|
||||
id (str): The official environment ID
|
||||
@@ -31,7 +32,7 @@ class EnvSpec(object):
|
||||
trials (int): The number of trials run in official evaluation
|
||||
"""
|
||||
|
||||
def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, kwargs=None):
|
||||
def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, kwargs=None, nondeterministic=False):
|
||||
self.id = id
|
||||
# Evaluation parameters
|
||||
self.timestep_limit = timestep_limit
|
||||
@@ -46,6 +47,7 @@ class EnvSpec(object):
|
||||
self._env_name = match.group(1)
|
||||
self._entry_point = entry_point
|
||||
self._kwargs = {} if kwargs is None else kwargs
|
||||
self._nondeterministic = nondeterministic
|
||||
|
||||
def make(self):
|
||||
"""Instantiates an instance of the environment with appropriate kwargs"""
|
||||
|
77
gym/envs/tests/test_determinism.py
Normal file
77
gym/envs/tests/test_determinism.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
from nose2 import tools
|
||||
import os
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import gym
|
||||
from gym import envs
|
||||
|
||||
specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
|
||||
@tools.params(*specs)
|
||||
def test_env(spec):
|
||||
# Skip mujoco tests for pull request CI
|
||||
skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
|
||||
if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
|
||||
return
|
||||
|
||||
# TODO(jonas 2016-05-11): Re-enable these tests after fixing box2d-py
|
||||
if spec._entry_point.startswith('gym.envs.box2d:'):
|
||||
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
|
||||
return
|
||||
|
||||
env1 = spec.make()
|
||||
env1.seed(0)
|
||||
action_samples1 = [env1.action_space.sample() for i in range(4)]
|
||||
observation_samples1 = [env1.observation_space.sample() for i in range(4)]
|
||||
initial_observation1 = env1.reset()
|
||||
step_responses1 = [env1.step(action) for action in action_samples1]
|
||||
env1.close()
|
||||
|
||||
env2 = spec.make()
|
||||
env2.seed(0)
|
||||
action_samples2 = [env2.action_space.sample() for i in range(4)]
|
||||
observation_samples2 = [env2.observation_space.sample() for i in range(4)]
|
||||
initial_observation2 = env2.reset()
|
||||
step_responses2 = [env2.step(action) for action in action_samples2]
|
||||
env2.close()
|
||||
|
||||
for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
|
||||
assert np.array_equal(action_sample1, action_sample2), '[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2)
|
||||
|
||||
for i, (observation_sample1, observation_sample2) in enumerate(zip(observation_samples1, observation_samples2)):
|
||||
# Allows for NaNs
|
||||
np.testing.assert_array_equal(observation_sample1, observation_sample2)
|
||||
|
||||
# Don't check rollout equality if it's a a nondetermistic
|
||||
# environment.
|
||||
if spec.nondetermistic:
|
||||
return
|
||||
|
||||
assert np.array_equal(initial_observation1, initial_observation2), 'initial_observation1: {}, initial_observation2: {}'.format(initial_observation1, initial_observation2)
|
||||
|
||||
for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
|
||||
assert_equals(o1, o2, '[{}] '.format(i))
|
||||
assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
|
||||
assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)
|
||||
|
||||
# Go returns a Pachi game board in info, which doesn't
|
||||
# properly check equality. For now, we hack around this by
|
||||
# just skipping Go.
|
||||
if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
|
||||
assert_equals(i1, i2, '[{}] '.format(i))
|
||||
|
||||
def assert_equals(a, b, prefix=None):
|
||||
assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b)
|
||||
if isinstance(a, dict):
|
||||
assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b)
|
||||
|
||||
for k in a.keys():
|
||||
v_a = a[k]
|
||||
v_b = b[k]
|
||||
assert_equals(v_a, v_b)
|
||||
elif isinstance(a, np.ndarray):
|
||||
np.testing.assert_array_equal(a, b)
|
||||
else:
|
||||
assert a == b
|
@@ -15,7 +15,7 @@ specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
|
||||
@tools.params(*specs)
|
||||
def test_env(spec):
|
||||
# Skip mujoco tests for pull request CI
|
||||
skip_mujoco = not os.environ.get('MUJOCO_KEY_BUNDLE')
|
||||
skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
|
||||
if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
|
||||
return
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import gym
|
||||
import random
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
def cmp(a, b):
|
||||
return (a > b) - (a < b)
|
||||
@@ -9,12 +9,12 @@ def cmp(a, b):
|
||||
deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
|
||||
|
||||
|
||||
def draw_card():
|
||||
return random.choice(deck)
|
||||
def draw_card(np_random):
|
||||
return np_random.choice(deck)
|
||||
|
||||
|
||||
def draw_hand():
|
||||
return [draw_card(), draw_card()]
|
||||
def draw_hand(np_random):
|
||||
return [draw_card(np_random), draw_card(np_random)]
|
||||
|
||||
|
||||
def usable_ace(hand): # Does this hand have a usable ace?
|
||||
@@ -71,20 +71,27 @@ class BlackjackEnv(gym.Env):
|
||||
https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
|
||||
"""
|
||||
def __init__(self, natural=False):
|
||||
self.action_space = spaces.Discrete(2)
|
||||
self.observation_space = spaces.Tuple((spaces.Discrete(32),
|
||||
spaces.Discrete(11),
|
||||
spaces.Discrete(2)))
|
||||
self._seed()
|
||||
|
||||
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
|
||||
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
|
||||
self.natural = natural
|
||||
# Start the first game
|
||||
self._reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(2, np_random=self.np_random)
|
||||
self.observation_space = spaces.Tuple((
|
||||
spaces.Discrete(32, np_random=self.np_random),
|
||||
spaces.Discrete(11, np_random=self.np_random),
|
||||
spaces.Discrete(2, np_random=self.np_random)))
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
assert(self.action_space.contains(action))
|
||||
if action: # hit: add a card to players hand and return
|
||||
self.player.append(draw_card())
|
||||
self.player.append(draw_card(self.np_random))
|
||||
if is_bust(self.player):
|
||||
done = True
|
||||
reward = -1
|
||||
@@ -94,7 +101,7 @@ class BlackjackEnv(gym.Env):
|
||||
else: # stick: play out the dealers hand, and score
|
||||
done = True
|
||||
while sum_hand(self.dealer) < 17:
|
||||
self.dealer.append(draw_card())
|
||||
self.dealer.append(draw_card(self.np_random))
|
||||
reward = cmp(score(self.player), score(self.dealer))
|
||||
if self.natural and is_natural(self.player) and reward == 1:
|
||||
reward = 1.5
|
||||
@@ -104,6 +111,6 @@ class BlackjackEnv(gym.Env):
|
||||
return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
|
||||
|
||||
def _reset(self):
|
||||
self.dealer = draw_hand()
|
||||
self.player = draw_hand()
|
||||
self.dealer = draw_hand(self.np_random)
|
||||
self.player = draw_hand(self.np_random)
|
||||
return self._get_obs()
|
||||
|
@@ -1,15 +1,15 @@
|
||||
from gym import Env
|
||||
from gym import spaces
|
||||
from gym import Env, spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
|
||||
def categorical_sample(prob_n):
|
||||
def categorical_sample(prob_n, np_random):
|
||||
"""
|
||||
Sample from categorical distribution
|
||||
Each row specifies class probabilities
|
||||
"""
|
||||
prob_n = np.asarray(prob_n)
|
||||
csprob_n = np.cumsum(prob_n)
|
||||
return (csprob_n > np.random.rand()).argmax()
|
||||
return (csprob_n > np_random.rand()).argmax()
|
||||
|
||||
|
||||
class DiscreteEnv(Env):
|
||||
@@ -28,24 +28,27 @@ class DiscreteEnv(Env):
|
||||
|
||||
"""
|
||||
def __init__(self, nS, nA, P, isd):
|
||||
self.action_space = spaces.Discrete(nA)
|
||||
self.observation_space = spaces.Discrete(nS)
|
||||
self.nA = nA
|
||||
self.P = P
|
||||
self.isd = isd
|
||||
self.lastaction=None # for rendering
|
||||
self.nS = nS
|
||||
self.nA = nA
|
||||
|
||||
@property
|
||||
def nS(self):
|
||||
return self.observation_space.n
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(self.nA, np_random=self.np_random)
|
||||
self.observation_space = spaces.Discrete(self.nS, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _reset(self):
|
||||
self.s = categorical_sample(self.isd)
|
||||
self.s = categorical_sample(self.isd, self.np_random)
|
||||
return self.s
|
||||
|
||||
def _step(self, a):
|
||||
transitions = self.P[self.s][a]
|
||||
i = categorical_sample([t[0] for t in transitions])
|
||||
i = categorical_sample([t[0] for t in transitions], self.np_random)
|
||||
p, s, r, d= transitions[i]
|
||||
self.s = s
|
||||
self.lastaction=a
|
||||
|
@@ -111,7 +111,7 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
|
||||
rew = float(newletter == b'G')
|
||||
li.append((1.0, newstate, rew, done))
|
||||
|
||||
super(FrozenLakeEnv, self).__init__(nrow * ncol, 4, P, isd)
|
||||
super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
|
||||
|
||||
def _render(self, mode='human', close=False):
|
||||
if close:
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import gym
|
||||
import random
|
||||
from gym import spaces
|
||||
|
||||
from gym.utils import seeding
|
||||
|
||||
class NChainEnv(gym.Env):
|
||||
"""n-Chain environment
|
||||
@@ -27,13 +26,18 @@ class NChainEnv(gym.Env):
|
||||
self.slip = slip # probability of 'slipping' an action
|
||||
self.small = small # payout for 'backwards' action
|
||||
self.large = large # payout at end of chain for 'forwards' action
|
||||
self.action_space = spaces.Discrete(2)
|
||||
self.observation_space = spaces.Discrete(n)
|
||||
self.state = 0 # Start at beginning of the chain
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(2, np_random=self.np_random)
|
||||
self.observation_space = spaces.Discrete(self.n, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
assert(self.action_space.contains(action))
|
||||
if random.random() < self.slip:
|
||||
if self.np_random.rand() < self.slip:
|
||||
action = not action # agent slipped, reverse action taken
|
||||
if action: # 'backwards': go back to the beginning, get small reward
|
||||
reward = self.small
|
||||
|
@@ -2,6 +2,7 @@ import numpy as np
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
|
||||
class RouletteEnv(gym.Env):
|
||||
@@ -17,8 +18,15 @@ class RouletteEnv(gym.Env):
|
||||
"""
|
||||
def __init__(self, spots=37):
|
||||
self.n = spots + 1
|
||||
self.action_space = spaces.Discrete(self.n)
|
||||
self.observation_space = spaces.Discrete(1)
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
self.action_space = spaces.Discrete(self.n, np_random=self.np_random)
|
||||
self.observation_space = spaces.Discrete(1, np_random=self.np_random)
|
||||
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
assert(action >= 0 and action < self.n)
|
||||
@@ -27,7 +35,7 @@ class RouletteEnv(gym.Env):
|
||||
return 0, 0, True, {}
|
||||
|
||||
# N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
|
||||
val = np.random.randint(0, self.n - 1)
|
||||
val = self.np_random.randint(0, self.n - 1)
|
||||
if val == action == 0:
|
||||
reward = self.n - 2.0
|
||||
elif val != 0 and action != 0 and val % 2 == action % 2:
|
||||
|
@@ -84,9 +84,6 @@ class TaxiEnv(discrete.DiscreteEnv):
|
||||
isd /= isd.sum()
|
||||
discrete.DiscreteEnv.__init__(self, nS, nA, P, isd)
|
||||
|
||||
self.observation_space = spaces.Discrete(500)
|
||||
self.action_space = spaces.Discrete(6)
|
||||
|
||||
def encode(self, taxirow, taxicol, passloc, destidx):
|
||||
# (5) 5, 5, 4
|
||||
i = taxirow
|
||||
|
10
gym/error.py
10
gym/error.py
@@ -12,8 +12,14 @@ class UnregisteredEnv(Error):
|
||||
pass
|
||||
|
||||
class DeprecatedEnv(Error):
|
||||
"""Raised when the user requests an env from the registry with an older version
|
||||
number than the latest env with the same name.
|
||||
"""Raised when the user requests an env from the registry with an
|
||||
older version number than the latest env with the same name.
|
||||
"""
|
||||
pass
|
||||
|
||||
class UnseedableEnv(Error):
|
||||
"""Raised when the user tries to seed an env that does not support
|
||||
seeding.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
@@ -10,7 +10,7 @@ import weakref
|
||||
|
||||
from gym import error, version
|
||||
from gym.monitoring import stats_recorder, video_recorder
|
||||
from gym.utils import atomic_write, closer
|
||||
from gym.utils import atomic_write, closer, seeding
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -83,8 +83,9 @@ class Monitor(object):
|
||||
self.enabled = False
|
||||
self.episode_id = 0
|
||||
self._monitor_id = None
|
||||
self.seeds = None
|
||||
|
||||
def start(self, directory, video_callable=None, force=False, resume=False):
|
||||
def start(self, directory, video_callable=None, force=False, resume=False, seed=None):
|
||||
"""Start monitoring.
|
||||
|
||||
Args:
|
||||
@@ -92,6 +93,7 @@ class Monitor(object):
|
||||
video_callable (Optional[function]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes.
|
||||
force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
|
||||
resume (bool): Retain the training data already in this directory, which will be merged with our new data
|
||||
seed (Optional[int]): The seed to run this environment with. By default, a random seed will be chosen.
|
||||
"""
|
||||
if self.env.spec is None:
|
||||
logger.warn("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.")
|
||||
@@ -116,6 +118,8 @@ class Monitor(object):
|
||||
You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5])))
|
||||
|
||||
|
||||
self._monitor_id = monitor_closer.register(self)
|
||||
|
||||
self.enabled = True
|
||||
self.directory = os.path.abspath(directory)
|
||||
# We use the 'openai-gym' prefix to determine if a file is
|
||||
@@ -126,7 +130,9 @@ class Monitor(object):
|
||||
self.configure(video_callable=video_callable)
|
||||
if not os.path.exists(directory):
|
||||
os.mkdir(directory)
|
||||
self._monitor_id = monitor_closer.register(self)
|
||||
|
||||
seeds = self.env.seed(seed)
|
||||
self.seeds = seeds
|
||||
|
||||
def flush(self):
|
||||
"""Flush all relevant monitor information to disk."""
|
||||
@@ -146,6 +152,7 @@ class Monitor(object):
|
||||
'videos': [(os.path.basename(v), os.path.basename(m))
|
||||
for v, m in self.videos],
|
||||
'env_info': self._env_info(),
|
||||
'seeds': self.seeds,
|
||||
}, f)
|
||||
|
||||
def close(self):
|
||||
@@ -249,13 +256,12 @@ class Monitor(object):
|
||||
return self.video_callable(self.episode_id)
|
||||
|
||||
def _env_info(self):
|
||||
if self.env.spec:
|
||||
return {
|
||||
'env_id': self.env.spec.id,
|
||||
env_info = {
|
||||
'gym_version': version.VERSION,
|
||||
}
|
||||
else:
|
||||
return {}
|
||||
if self.env.spec:
|
||||
env_info['env_id'] = self.env.spec.id
|
||||
return env_info
|
||||
|
||||
def __del__(self):
|
||||
# Make sure we've closed up shop when garbage collecting
|
||||
@@ -274,6 +280,8 @@ def load_results(training_dir):
|
||||
# Load up stats + video files
|
||||
stats_files = []
|
||||
videos = []
|
||||
main_seeds = []
|
||||
seeds = []
|
||||
env_infos = []
|
||||
|
||||
for manifest in manifests:
|
||||
@@ -284,6 +292,13 @@ def load_results(training_dir):
|
||||
videos += [(os.path.join(training_dir, v), os.path.join(training_dir, m))
|
||||
for v, m in contents['videos']]
|
||||
env_infos.append(contents['env_info'])
|
||||
current_seeds = contents.get('seeds', [])
|
||||
seeds += current_seeds
|
||||
if current_seeds:
|
||||
main_seeds.append(current_seeds[0])
|
||||
else:
|
||||
# current_seeds could be None or []
|
||||
main_seeds.append(None)
|
||||
|
||||
env_info = collapse_env_infos(env_infos, training_dir)
|
||||
timestamps, episode_lengths, episode_rewards, initial_reset_timestamp = merge_stats_files(stats_files)
|
||||
@@ -296,6 +311,8 @@ def load_results(training_dir):
|
||||
'episode_rewards': episode_rewards,
|
||||
'initial_reset_timestamp': initial_reset_timestamp,
|
||||
'videos': videos,
|
||||
'main_seeds': main_seeds,
|
||||
'seeds': seeds,
|
||||
}
|
||||
|
||||
def merge_stats_files(stats_files):
|
||||
|
@@ -19,7 +19,7 @@ class StatsRecorder(object):
|
||||
self.done = None
|
||||
self.closed = False
|
||||
|
||||
filename = '{}.{}.stats.json'.format(self.file_prefix, os.getpid())
|
||||
filename = '{}.stats.json'.format(self.file_prefix)
|
||||
self.path = os.path.join(self.directory, filename)
|
||||
|
||||
def before_step(self, action):
|
||||
|
@@ -89,6 +89,8 @@ def upload_training_data(training_dir, api_key=None):
|
||||
timestamps = results['timestamps']
|
||||
episode_lengths = results['episode_lengths']
|
||||
episode_rewards = results['episode_rewards']
|
||||
main_seeds = results['main_seeds']
|
||||
seeds = results['seeds']
|
||||
videos = results['videos']
|
||||
|
||||
env_id = env_info['env_id']
|
||||
@@ -96,7 +98,7 @@ def upload_training_data(training_dir, api_key=None):
|
||||
|
||||
# Do the relevant uploads
|
||||
if len(episode_lengths) > 0:
|
||||
training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key, env_id=env_id)
|
||||
training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key, env_id=env_id)
|
||||
else:
|
||||
training_episode_batch = None
|
||||
|
||||
@@ -112,13 +114,15 @@ def upload_training_data(training_dir, api_key=None):
|
||||
|
||||
return env_info, training_episode_batch, training_video
|
||||
|
||||
def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key=None, env_id=None):
|
||||
def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key=None, env_id=None):
|
||||
logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
|
||||
file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
|
||||
file_upload.put({
|
||||
'episode_lengths': episode_lengths,
|
||||
'episode_rewards': episode_rewards,
|
||||
'timestamps': timestamps,
|
||||
'main_seeds': main_seeds,
|
||||
'seeds': seeds,
|
||||
})
|
||||
return file_upload
|
||||
|
||||
|
@@ -6,12 +6,15 @@ class Box(Space):
|
||||
A box in R^n.
|
||||
I.e., each coordinate is bounded.
|
||||
"""
|
||||
def __init__(self, low, high, shape=None):
|
||||
def __init__(self, low, high, shape=None, np_random=None):
|
||||
"""
|
||||
Two kinds of valid input:
|
||||
Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
|
||||
Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
|
||||
"""
|
||||
if np_random is None:
|
||||
np_random = np.random
|
||||
self.np_random = np_random
|
||||
if shape is None:
|
||||
assert low.shape == high.shape
|
||||
self.low = low
|
||||
@@ -21,7 +24,7 @@ class Box(Space):
|
||||
self.low = low + np.zeros(shape)
|
||||
self.high = high + np.zeros(shape)
|
||||
def sample(self):
|
||||
return np.random.uniform(low=self.low, high=self.high, size=self.low.shape)
|
||||
return self.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
|
||||
def contains(self, x):
|
||||
return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()
|
||||
|
||||
|
@@ -5,10 +5,13 @@ class Discrete(Space):
|
||||
"""
|
||||
{0,1,...,n-1}
|
||||
"""
|
||||
def __init__(self, n):
|
||||
def __init__(self, n, np_random=None):
|
||||
if np_random is None:
|
||||
np_random = np.random
|
||||
self.np_random = np_random
|
||||
self.n = n
|
||||
def sample(self):
|
||||
return np.random.randint(self.n)
|
||||
return self.np_random.randint(self.n)
|
||||
def contains(self, x):
|
||||
if isinstance(x, int):
|
||||
as_int = x
|
||||
|
@@ -13,13 +13,17 @@ class HighLow(Space):
|
||||
e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
|
||||
the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
|
||||
"""
|
||||
def __init__(self, matrix):
|
||||
def __init__(self, matrix, np_random=None):
|
||||
"""
|
||||
A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column
|
||||
is the maximum (inclusive), and the third column is the precision (number of decimals to keep)
|
||||
|
||||
e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])
|
||||
"""
|
||||
if np_random is None:
|
||||
np_random = np.random
|
||||
self.np_random = np_random
|
||||
|
||||
(num_rows, num_cols) = matrix.shape
|
||||
assert num_rows >= 1
|
||||
assert num_cols == 3
|
||||
@@ -29,7 +33,7 @@ class HighLow(Space):
|
||||
def sample(self):
|
||||
# For each row: round(random .* (max - min) + min, precision)
|
||||
max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
|
||||
random_matrix = np.multiply(max_minus_min, np.random.rand(self.num_rows, 1)) + self.matrix[:, 0]
|
||||
random_matrix = np.multiply(max_minus_min, self.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
|
||||
rounded_matrix = np.zeros(self.num_rows)
|
||||
for i in range(self.num_rows):
|
||||
rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))
|
||||
|
97
gym/utils/seeding.py
Normal file
97
gym/utils/seeding.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import hashlib
|
||||
import numpy as np
|
||||
import os
|
||||
import random as _random
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from gym import error
|
||||
|
||||
if sys.version_info < (3,):
|
||||
integer_types = (int, long)
|
||||
else:
|
||||
integer_types = (int,)
|
||||
|
||||
# Fortunately not needed right now!
|
||||
#
|
||||
# def random(seed=None):
|
||||
# seed = _seed(seed)
|
||||
#
|
||||
# rng = _random.Random()
|
||||
# rng.seed(hash_seed(seed))
|
||||
# return rng, seed
|
||||
|
||||
def np_random(seed=None):
|
||||
if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed):
|
||||
raise error.Error('Seed must be a non-negative integer or omitted, not {}'.format(seed))
|
||||
|
||||
seed = _seed(seed)
|
||||
|
||||
rng = np.random.RandomState()
|
||||
rng.seed(_int_list_from_bigint(hash_seed(seed)))
|
||||
return rng, seed
|
||||
|
||||
def hash_seed(seed, max_bytes=8):
|
||||
"""Any given evaluation is likely to have many PRNG's active at
|
||||
once. (Most commonly, because the environment is running in
|
||||
multiple processes.) There's literature indicating that having
|
||||
linear correlations between seeds of multiple PRNG's can correlate
|
||||
the outputs:
|
||||
|
||||
http://blogs.unity3d.com/2015/01/07/a-primer-on-repeatable-random-numbers/
|
||||
http://stackoverflow.com/questions/1554958/how-different-do-random-seeds-need-to-be
|
||||
http://dl.acm.org/citation.cfm?id=1276928
|
||||
|
||||
Thus, for sanity we hash the seeds before using them. (This scheme
|
||||
is likely not crypto-strength, but it should be good enough to get
|
||||
rid of simple correlations.)
|
||||
"""
|
||||
hash = hashlib.sha512(str(seed).encode('utf8')).digest()
|
||||
return _bigint_from_bytes(hash[:max_bytes])
|
||||
|
||||
def _seed(a=None, max_bytes=8):
|
||||
"""Create a strong random seed. Otherwise, Python 2 would seed using
|
||||
the system time, which might be non-robust especially in the
|
||||
presence of concurrency.
|
||||
|
||||
Args:
|
||||
a (Optional[int, str]): None seeds from an operating system specific randomness source. If an int or str passed, all of the bits are used.
|
||||
"""
|
||||
# Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
|
||||
if a is None:
|
||||
a = _bigint_from_bytes(os.urandom(max_bytes))
|
||||
elif isinstance(a, str):
|
||||
a = a.encode('utf8')
|
||||
a += hashlib.sha512(a).digest()
|
||||
a = _bigint_from_bytes(a[:max_bytes])
|
||||
elif isinstance(a, integer_types):
|
||||
a = a % 2**(8 * max_bytes)
|
||||
else:
|
||||
raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a))
|
||||
|
||||
return a
|
||||
|
||||
# TODO: don't hardcode sizeof_int here
|
||||
def _bigint_from_bytes(bytes):
|
||||
sizeof_int = 4
|
||||
padding = sizeof_int - len(bytes) % sizeof_int
|
||||
bytes += b'\0' * padding
|
||||
int_count = int(len(bytes) / sizeof_int)
|
||||
unpacked = struct.unpack("{}I".format(int_count), bytes)
|
||||
accum = 0
|
||||
for i, val in enumerate(unpacked):
|
||||
accum += 2 ** (sizeof_int * 8 * i) * val
|
||||
return accum
|
||||
|
||||
def _int_list_from_bigint(bigint):
|
||||
# Special case 0
|
||||
if bigint < 0:
|
||||
raise error.Error('Seed must be non-negative, not {}'.format(bigint))
|
||||
elif bigint == 0:
|
||||
return [0]
|
||||
|
||||
ints = []
|
||||
while bigint > 0:
|
||||
bigint, mod = divmod(bigint, 2 ** 32)
|
||||
ints.append(mod)
|
||||
return ints
|
16
gym/utils/tests/test_seeding.py
Normal file
16
gym/utils/tests/test_seeding.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from gym import error
|
||||
from gym.utils import seeding
|
||||
|
||||
def test_invalid_seeds():
|
||||
for seed in [-1, 'test']:
|
||||
try:
|
||||
seeding.np_random(seed)
|
||||
except error.Error:
|
||||
pass
|
||||
else:
|
||||
assert False, 'Invalid seed {} passed validation'.format(seed)
|
||||
|
||||
def test_valid_seeds():
|
||||
for seed in [0, 1]:
|
||||
random, seed1 = seeding.np_random(seed)
|
||||
assert seed == seed1
|
Reference in New Issue
Block a user