Switch to a global PRNG for action/observation spaces (#144)

cf 58e6aa95e5 (commitcomment-17669277)
This commit is contained in:
Greg Brockman
2016-05-30 18:07:59 -07:00
parent 5bb2337585
commit 8a535ca6f2
32 changed files with 198 additions and 179 deletions

View File

@@ -28,14 +28,14 @@ class AlgorithmicEnv(Env):
AlgorithmicEnv.current_length = 2
tape_control = []
self.action_space = Tuple(([Discrete(2 * self.inp_dim), Discrete(2), Discrete(self.base)]))
self.observation_space = Discrete(self.base + 1)
self._seed()
self.reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = Tuple(([Discrete(2 * self.inp_dim, np_random=self.np_random), Discrete(2, np_random=self.np_random), Discrete(self.base, np_random=self.np_random)]))
self.observation_space = Discrete(self.base + 1, np_random=self.np_random)
return [seed]
def _get_obs(self, pos=None):

View File

@@ -41,6 +41,17 @@ class AtariEnv(gym.Env, utils.EzPickle):
self._seed()
self._action_set = self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set))
(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets
@@ -50,17 +61,6 @@ class AtariEnv(gym.Env, utils.EzPickle):
# Empirically, we need to seed before loading the ROM.
self.ale.setInt(b'random_seed', seed2)
self.ale.loadROM(self.game_path)
self._action_set = self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)
(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
return [seed1, seed2]
def _step(self, a):

View File

@@ -159,16 +159,16 @@ class GoEnv(gym.Env):
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
self.reset()
shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2)
def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
pachi_py.pachi_srand(seed2)
shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
return [seed1, seed2]
def _reset(self):

View File

@@ -56,16 +56,17 @@ class HexEnv(gym.Env):
if self.observation_type != 'numpy3c':
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1)
observation = self.reset()
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape))
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
observation = self.reset()
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)
# Update the random policy if needed
if isinstance(self.opponent, str):
if self.opponent == 'random':

View File

@@ -97,11 +97,12 @@ class BipedalWalker(gym.Env):
self.prev_shaping = None
self._reset()
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
self.observation_space = spaces.Box(-high, high)
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed]
def _destroy(self):

View File

@@ -117,10 +117,11 @@ class CarRacing(gym.Env):
self.reward = 0.0
self.prev_reward = 0.0
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1])) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
return [seed]
def _destroy(self):

View File

@@ -88,14 +88,14 @@ class LunarLander(gym.Env):
self.prev_reward = None
self._reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# useful range is -1 .. +1
high = np.array([np.inf]*8)
# nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4, np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Box(-high, high)
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):

View File

@@ -80,15 +80,14 @@ class AcrobotEnv(core.Env):
def __init__(self):
self.viewer = None
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low, high)
self.action_space = spaces.Discrete(3)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
return [seed]
def _reset(self):

View File

@@ -32,6 +32,11 @@ class CartPoleEnv(gym.Env):
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high)
self._seed()
self.reset()
self.viewer = None
@@ -40,10 +45,6 @@ class CartPoleEnv(gym.Env):
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed]
def _step(self, action):

View File

@@ -25,13 +25,14 @@ class MountainCarEnv(gym.Env):
self.viewer = None
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high)
self._seed()
self.reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
return [seed]
def _step(self, action):

View File

@@ -15,14 +15,15 @@ class PendulumEnv(gym.Env):
self.max_torque=2.
self.dt=.05
self.viewer = None
high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,))
self.observation_space = spaces.Box(low=-high, high=high)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
return [seed]
def _step(self,u):

View File

@@ -54,15 +54,14 @@ class DoomBasicEnv(doom_env.DoomEnv):
self.viewer = None
self.game.init()
self.game.new_episode()
# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -56,15 +56,13 @@ class DoomCorridorEnv(doom_env.DoomEnv):
self.game.init()
self.game.new_episode()
# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -46,15 +46,13 @@ class DoomDeathmatchEnv(doom_env.DoomEnv):
self.game.init()
self.game.new_episode()
# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -55,15 +55,13 @@ class DoomDefendCenterEnv(doom_env.DoomEnv):
self.game.init()
self.game.new_episode()
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -52,17 +52,14 @@ class DoomDefendLineEnv(doom_env.DoomEnv):
self.screen_width = 640 # Must match .cfg file
self.game.set_window_visible(False)
self.viewer = None
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
self.game.init()
self.game.new_episode()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -53,15 +53,13 @@ class DoomHealthGatheringEnv(doom_env.DoomEnv):
self.game.init()
self.game.new_episode()
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -52,15 +52,13 @@ class DoomMyWayHomeEnv(doom_env.DoomEnv):
self.game.init()
self.game.new_episode()
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -57,15 +57,14 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
self.game.init()
self.game.new_episode()
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -50,16 +50,13 @@ class DoomTakeCoverEnv(doom_env.DoomEnv):
self.game.init()
self.game.new_episode()
# 2 allowed actions [9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed()
def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
self.game.set_seed(seed2)
# 2 allowed actions [9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]
seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed)
return [seed]

View File

@@ -39,19 +39,20 @@ class MujocoEnv(gym.Env):
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
assert not done
self.obs_dim = observation.size
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
bounds = self.model.actuator_ctrlrange.copy()
low = bounds[:, 0]
high = bounds[:, 1]
self.action_space = spaces.Box(low, high, np_random=self.np_random)
self.action_space = spaces.Box(low, high)
high = np.inf*np.ones(self.obs_dim)
low = -high
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
self.observation_space = spaces.Box(low, high)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
# methods to override:

View File

@@ -6,7 +6,7 @@ import logging
logger = logging.getLogger(__name__)
import gym
from gym import envs
from gym import envs, spaces
specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
@tools.params(*specs)
@@ -21,6 +21,11 @@ def test_env(spec):
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
return
# Note that this precludes running this test in multiple
# threads. However, we probably already can't do multithreading
# due to some environments.
spaces.seed(0)
env1 = spec.make()
env1.seed(0)
action_samples1 = [env1.action_space.sample() for i in range(4)]
@@ -29,6 +34,8 @@ def test_env(spec):
step_responses1 = [env1.step(action) for action in action_samples1]
env1.close()
spaces.seed(0)
env2 = spec.make()
env2.seed(0)
action_samples2 = [env2.action_space.sample() for i in range(4)]

View File

@@ -71,6 +71,11 @@ class BlackjackEnv(gym.Env):
https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
"""
def __init__(self, natural=False):
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Tuple((
spaces.Discrete(32),
spaces.Discrete(11),
spaces.Discrete(2)))
self._seed()
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
@@ -81,11 +86,6 @@ class BlackjackEnv(gym.Env):
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Tuple((
spaces.Discrete(32, np_random=self.np_random),
spaces.Discrete(11, np_random=self.np_random),
spaces.Discrete(2, np_random=self.np_random)))
return [seed]
def _step(self, action):

View File

@@ -1,6 +1,7 @@
import numpy as np
from gym import Env, spaces
from gym.utils import seeding
import numpy as np
def categorical_sample(prob_n, np_random):
"""
@@ -34,12 +35,13 @@ class DiscreteEnv(Env):
self.nS = nS
self.nA = nA
self.action_space = spaces.Discrete(self.nA)
self.observation_space = spaces.Discrete(self.nS)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(self.nA, np_random=self.np_random)
self.observation_space = spaces.Discrete(self.nS, np_random=self.np_random)
return [seed]
def _reset(self):

View File

@@ -27,12 +27,12 @@ class NChainEnv(gym.Env):
self.small = small # payout for 'backwards' action
self.large = large # payout at end of chain for 'forwards' action
self.state = 0 # Start at beginning of the chain
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Discrete(self.n)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Discrete(self.n, np_random=self.np_random)
return [seed]
def _step(self, action):

View File

@@ -18,14 +18,12 @@ class RouletteEnv(gym.Env):
"""
def __init__(self, spots=37):
self.n = spots + 1
self.action_space = spaces.Discrete(self.n)
self.observation_space = spaces.Discrete(1)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(self.n, np_random=self.np_random)
self.observation_space = spaces.Discrete(1, np_random=self.np_random)
return [seed]
def _step(self, action):

View File

@@ -1,6 +1,7 @@
from .box import Box
from .discrete import Discrete
from .high_low import HighLow
from .tuple_space import Tuple
from gym.spaces.box import Box
from gym.spaces.discrete import Discrete
from gym.spaces.high_low import HighLow
from gym.spaces.prng import seed
from gym.spaces.tuple_space import Tuple
__all__ = ["Box", "Discrete", "HighLow", "Tuple"]

View File

@@ -1,20 +1,19 @@
from gym import Space
import numpy as np
class Box(Space):
import gym
from gym.spaces import prng
class Box(gym.Space):
"""
A box in R^n.
I.e., each coordinate is bounded.
"""
def __init__(self, low, high, shape=None, np_random=None):
def __init__(self, low, high, shape=None):
"""
Two kinds of valid input:
Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
"""
if np_random is None:
np_random = np.random
self.np_random = np_random
if shape is None:
assert low.shape == high.shape
self.low = low
@@ -24,7 +23,7 @@ class Box(Space):
self.low = low + np.zeros(shape)
self.high = high + np.zeros(shape)
def sample(self):
return self.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
return prng.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
def contains(self, x):
return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()

View File

@@ -1,17 +1,16 @@
import numpy as np
from gym import Space
class Discrete(Space):
import gym
from gym.spaces import prng
class Discrete(gym.Space):
"""
{0,1,...,n-1}
"""
def __init__(self, n, np_random=None):
if np_random is None:
np_random = np.random
self.np_random = np_random
def __init__(self, n):
self.n = n
def sample(self):
return self.np_random.randint(self.n)
return prng.np_random.randint(self.n)
def contains(self, x):
if isinstance(x, int):
as_int = x

View File

@@ -1,7 +1,9 @@
from gym import Space
import numpy as np
class HighLow(Space):
import gym
from gym.spaces import prng
class HighLow(gym.Space):
"""
A matrix of dimensions n x 3, where
@@ -13,17 +15,13 @@ class HighLow(Space):
e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
"""
def __init__(self, matrix, np_random=None):
def __init__(self, matrix):
"""
A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column
is the maximum (inclusive), and the third column is the precision (number of decimals to keep)
e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])
"""
if np_random is None:
np_random = np.random
self.np_random = np_random
(num_rows, num_cols) = matrix.shape
assert num_rows >= 1
assert num_cols == 3
@@ -33,7 +31,7 @@ class HighLow(Space):
def sample(self):
# For each row: round(random .* (max - min) + min, precision)
max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
random_matrix = np.multiply(max_minus_min, self.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
random_matrix = np.multiply(max_minus_min, prng.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
rounded_matrix = np.zeros(self.num_rows)
for i in range(self.num_rows):
rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))

20
gym/spaces/prng.py Normal file
View File

@@ -0,0 +1,20 @@
import numpy
np_random = numpy.random.RandomState()
def seed(seed=None):
"""Seed the common numpy.random.RandomState used in spaces
CF
https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277
for some details about why we seed the spaces separately from the
envs, but tl;dr is that it's pretty uncommon for them to be used
within an actual algorithm, and the code becomes simpler to just
use this common numpy.random.RandomState.
"""
np_random.seed(seed)
# This numpy.random.RandomState gets used in all spaces for their
# 'sample' method. It's not really expected that people will be using
# these in their algorithms.
seed(0)

View File

@@ -31,7 +31,7 @@ def np_random(seed=None):
rng.seed(_int_list_from_bigint(hash_seed(seed)))
return rng, seed
def hash_seed(seed, max_bytes=8):
def hash_seed(seed=None, max_bytes=8):
"""Any given evaluation is likely to have many PRNG's active at
once. (Most commonly, because the environment is running in
multiple processes.) There's literature indicating that having
@@ -45,7 +45,13 @@ def hash_seed(seed, max_bytes=8):
Thus, for sanity we hash the seeds before using them. (This scheme
is likely not crypto-strength, but it should be good enough to get
rid of simple correlations.)
Args:
seed (Optional[int]): None seeds from an operating system specific randomness source.
max_bytes: Maximum number of bytes to use in the hashed seed.
"""
if seed is None:
seed = _seed(max_bytes=max_bytes)
hash = hashlib.sha512(str(seed).encode('utf8')).digest()
return _bigint_from_bytes(hash[:max_bytes])
@@ -55,7 +61,8 @@ def _seed(a=None, max_bytes=8):
presence of concurrency.
Args:
a (Optional[int, str]): None seeds from an operating system specific randomness source. If an int or str passed, all of the bits are used.
a (Optional[int, str]): None seeds from an operating system specific randomness source.
max_bytes: Maximum number of bytes to use in the seed.
"""
# Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
if a is None: