Switch to a global PRNG for action/observation spaces (#144)

cf 58e6aa95e5 (commitcomment-17669277)
This commit is contained in:
Greg Brockman
2016-05-30 18:07:59 -07:00
parent 5bb2337585
commit 8a535ca6f2
32 changed files with 198 additions and 179 deletions

View File

@@ -28,14 +28,14 @@ class AlgorithmicEnv(Env):
AlgorithmicEnv.current_length = 2 AlgorithmicEnv.current_length = 2
tape_control = [] tape_control = []
self.action_space = Tuple(([Discrete(2 * self.inp_dim), Discrete(2), Discrete(self.base)]))
self.observation_space = Discrete(self.base + 1)
self._seed() self._seed()
self.reset() self.reset()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
self.action_space = Tuple(([Discrete(2 * self.inp_dim, np_random=self.np_random), Discrete(2, np_random=self.np_random), Discrete(self.base, np_random=self.np_random)]))
self.observation_space = Discrete(self.base + 1, np_random=self.np_random)
return [seed] return [seed]
def _get_obs(self, pos=None): def _get_obs(self, pos=None):

View File

@@ -41,6 +41,17 @@ class AtariEnv(gym.Env, utils.EzPickle):
self._seed() self._seed()
self._action_set = self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set))
(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed) self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets # Derive a random seed. This gets passed as a uint, but gets
@@ -50,17 +61,6 @@ class AtariEnv(gym.Env, utils.EzPickle):
# Empirically, we need to seed before loading the ROM. # Empirically, we need to seed before loading the ROM.
self.ale.setInt(b'random_seed', seed2) self.ale.setInt(b'random_seed', seed2)
self.ale.loadROM(self.game_path) self.ale.loadROM(self.game_path)
self._action_set = self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)
(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
return [seed1, seed2] return [seed1, seed2]
def _step(self, a): def _step(self, a):

View File

@@ -159,16 +159,16 @@ class GoEnv(gym.Env):
raise error.Error('Unsupported observation type: {}'.format(self.observation_type)) raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
self.reset() self.reset()
shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2)
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed) self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. # Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 seed2 = seeding.hash_seed(seed1 + 1) % 2**32
pachi_py.pachi_srand(seed2) pachi_py.pachi_srand(seed2)
shape = pachi_py.CreateBoard(self.board_size).encode().shape
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
# One action for each board position, pass, and resign
self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
return [seed1, seed2] return [seed1, seed2]
def _reset(self): def _reset(self):

View File

@@ -56,16 +56,17 @@ class HexEnv(gym.Env):
if self.observation_type != 'numpy3c': if self.observation_type != 'numpy3c':
raise error.Error('Unsupported observation type: {}'.format(self.observation_type)) raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1)
observation = self.reset()
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
# One action for each board position and resign
self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
observation = self.reset()
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)
# Update the random policy if needed # Update the random policy if needed
if isinstance(self.opponent, str): if isinstance(self.opponent, str):
if self.opponent == 'random': if self.opponent == 'random':

View File

@@ -97,11 +97,12 @@ class BipedalWalker(gym.Env):
self.prev_shaping = None self.prev_shaping = None
self._reset() self._reset()
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
self.observation_space = spaces.Box(-high, high)
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed] return [seed]
def _destroy(self): def _destroy(self):

View File

@@ -117,10 +117,11 @@ class CarRacing(gym.Env):
self.reward = 0.0 self.reward = 0.0
self.prev_reward = 0.0 self.prev_reward = 0.0
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1])) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
return [seed] return [seed]
def _destroy(self): def _destroy(self):

View File

@@ -88,14 +88,14 @@ class LunarLander(gym.Env):
self.prev_reward = None self.prev_reward = None
self._reset() self._reset()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
# useful range is -1 .. +1 # useful range is -1 .. +1
high = np.array([np.inf]*8) high = np.array([np.inf]*8)
# nop, fire left engine, main engine, right engine # nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4, np_random=self.np_random) self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random) self.observation_space = spaces.Box(-high, high)
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed] return [seed]
def _destroy(self): def _destroy(self):

View File

@@ -80,15 +80,14 @@ class AcrobotEnv(core.Env):
def __init__(self): def __init__(self):
self.viewer = None self.viewer = None
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low, high)
self.action_space = spaces.Discrete(3)
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
return [seed] return [seed]
def _reset(self): def _reset(self):

View File

@@ -32,6 +32,11 @@ class CartPoleEnv(gym.Env):
self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4 self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high)
self._seed() self._seed()
self.reset() self.reset()
self.viewer = None self.viewer = None
@@ -40,10 +45,6 @@ class CartPoleEnv(gym.Env):
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
return [seed] return [seed]
def _step(self, action): def _step(self, action):

View File

@@ -25,13 +25,14 @@ class MountainCarEnv(gym.Env):
self.viewer = None self.viewer = None
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high)
self._seed() self._seed()
self.reset() self.reset()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(3, np_random=self.np_random)
self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
return [seed] return [seed]
def _step(self, action): def _step(self, action):

View File

@@ -15,14 +15,15 @@ class PendulumEnv(gym.Env):
self.max_torque=2. self.max_torque=2.
self.dt=.05 self.dt=.05
self.viewer = None self.viewer = None
high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,))
self.observation_space = spaces.Box(low=-high, high=high)
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
return [seed] return [seed]
def _step(self,u): def _step(self,u):

View File

@@ -54,15 +54,14 @@ class DoomBasicEnv(doom_env.DoomEnv):
self.viewer = None self.viewer = None
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -56,15 +56,13 @@ class DoomCorridorEnv(doom_env.DoomEnv):
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -46,15 +46,13 @@ class DoomDeathmatchEnv(doom_env.DoomEnv):
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -55,15 +55,13 @@ class DoomDefendCenterEnv(doom_env.DoomEnv):
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -52,17 +52,14 @@ class DoomDefendLineEnv(doom_env.DoomEnv):
self.screen_width = 640 # Must match .cfg file self.screen_width = 640 # Must match .cfg file
self.game.set_window_visible(False) self.game.set_window_visible(False)
self.viewer = None self.viewer = None
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -53,15 +53,13 @@ class DoomHealthGatheringEnv(doom_env.DoomEnv):
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -52,15 +52,13 @@ class DoomMyWayHomeEnv(doom_env.DoomEnv):
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -57,15 +57,14 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. # Derive a random seed.
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 seed = seeding.hash_seed(seed) % 2**32
self.game.set_seed(seed2) self.game.set_seed(seed)
return [seed]
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -50,16 +50,13 @@ class DoomTakeCoverEnv(doom_env.DoomEnv):
self.game.init() self.game.init()
self.game.new_episode() self.game.new_episode()
# 2 allowed actions [9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
np_random, seed1 = seeding.np_random(seed) seed = seeding.hash_seed(seed) % 2**32
# Derive a random seed. self.game.set_seed(seed)
seed2 = seeding.hash_seed(seed1 + 1) % 2**32 return [seed]
self.game.set_seed(seed2)
# 2 allowed actions [9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2), np_random=np_random)
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
return [seed1, seed2]

View File

@@ -39,19 +39,20 @@ class MujocoEnv(gym.Env):
observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
assert not done assert not done
self.obs_dim = observation.size self.obs_dim = observation.size
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
bounds = self.model.actuator_ctrlrange.copy() bounds = self.model.actuator_ctrlrange.copy()
low = bounds[:, 0] low = bounds[:, 0]
high = bounds[:, 1] high = bounds[:, 1]
self.action_space = spaces.Box(low, high, np_random=self.np_random) self.action_space = spaces.Box(low, high)
high = np.inf*np.ones(self.obs_dim) high = np.inf*np.ones(self.obs_dim)
low = -high low = -high
self.observation_space = spaces.Box(low, high, np_random=self.np_random) self.observation_space = spaces.Box(low, high)
self._seed()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed] return [seed]
# methods to override: # methods to override:

View File

@@ -6,7 +6,7 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
import gym import gym
from gym import envs from gym import envs, spaces
specs = [spec for spec in envs.registry.all() if spec._entry_point is not None] specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
@tools.params(*specs) @tools.params(*specs)
@@ -21,6 +21,11 @@ def test_env(spec):
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point)) logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
return return
# Note that this precludes running this test in multiple
# threads. However, we probably already can't do multithreading
# due to some environments.
spaces.seed(0)
env1 = spec.make() env1 = spec.make()
env1.seed(0) env1.seed(0)
action_samples1 = [env1.action_space.sample() for i in range(4)] action_samples1 = [env1.action_space.sample() for i in range(4)]
@@ -29,6 +34,8 @@ def test_env(spec):
step_responses1 = [env1.step(action) for action in action_samples1] step_responses1 = [env1.step(action) for action in action_samples1]
env1.close() env1.close()
spaces.seed(0)
env2 = spec.make() env2 = spec.make()
env2.seed(0) env2.seed(0)
action_samples2 = [env2.action_space.sample() for i in range(4)] action_samples2 = [env2.action_space.sample() for i in range(4)]

View File

@@ -71,6 +71,11 @@ class BlackjackEnv(gym.Env):
https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
""" """
def __init__(self, natural=False): def __init__(self, natural=False):
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Tuple((
spaces.Discrete(32),
spaces.Discrete(11),
spaces.Discrete(2)))
self._seed() self._seed()
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
@@ -81,11 +86,6 @@ class BlackjackEnv(gym.Env):
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Tuple((
spaces.Discrete(32, np_random=self.np_random),
spaces.Discrete(11, np_random=self.np_random),
spaces.Discrete(2, np_random=self.np_random)))
return [seed] return [seed]
def _step(self, action): def _step(self, action):

View File

@@ -1,6 +1,7 @@
import numpy as np
from gym import Env, spaces from gym import Env, spaces
from gym.utils import seeding from gym.utils import seeding
import numpy as np
def categorical_sample(prob_n, np_random): def categorical_sample(prob_n, np_random):
""" """
@@ -34,12 +35,13 @@ class DiscreteEnv(Env):
self.nS = nS self.nS = nS
self.nA = nA self.nA = nA
self.action_space = spaces.Discrete(self.nA)
self.observation_space = spaces.Discrete(self.nS)
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(self.nA, np_random=self.np_random)
self.observation_space = spaces.Discrete(self.nS, np_random=self.np_random)
return [seed] return [seed]
def _reset(self): def _reset(self):

View File

@@ -27,12 +27,12 @@ class NChainEnv(gym.Env):
self.small = small # payout for 'backwards' action self.small = small # payout for 'backwards' action
self.large = large # payout at end of chain for 'forwards' action self.large = large # payout at end of chain for 'forwards' action
self.state = 0 # Start at beginning of the chain self.state = 0 # Start at beginning of the chain
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Discrete(self.n)
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(2, np_random=self.np_random)
self.observation_space = spaces.Discrete(self.n, np_random=self.np_random)
return [seed] return [seed]
def _step(self, action): def _step(self, action):

View File

@@ -18,14 +18,12 @@ class RouletteEnv(gym.Env):
""" """
def __init__(self, spots=37): def __init__(self, spots=37):
self.n = spots + 1 self.n = spots + 1
self.action_space = spaces.Discrete(self.n)
self.observation_space = spaces.Discrete(1)
self._seed() self._seed()
def _seed(self, seed=None): def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
self.action_space = spaces.Discrete(self.n, np_random=self.np_random)
self.observation_space = spaces.Discrete(1, np_random=self.np_random)
return [seed] return [seed]
def _step(self, action): def _step(self, action):

View File

@@ -1,6 +1,7 @@
from .box import Box from gym.spaces.box import Box
from .discrete import Discrete from gym.spaces.discrete import Discrete
from .high_low import HighLow from gym.spaces.high_low import HighLow
from .tuple_space import Tuple from gym.spaces.prng import seed
from gym.spaces.tuple_space import Tuple
__all__ = ["Box", "Discrete", "HighLow", "Tuple"] __all__ = ["Box", "Discrete", "HighLow", "Tuple"]

View File

@@ -1,20 +1,19 @@
from gym import Space
import numpy as np import numpy as np
class Box(Space): import gym
from gym.spaces import prng
class Box(gym.Space):
""" """
A box in R^n. A box in R^n.
I.e., each coordinate is bounded. I.e., each coordinate is bounded.
""" """
def __init__(self, low, high, shape=None, np_random=None): def __init__(self, low, high, shape=None):
""" """
Two kinds of valid input: Two kinds of valid input:
Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
""" """
if np_random is None:
np_random = np.random
self.np_random = np_random
if shape is None: if shape is None:
assert low.shape == high.shape assert low.shape == high.shape
self.low = low self.low = low
@@ -24,7 +23,7 @@ class Box(Space):
self.low = low + np.zeros(shape) self.low = low + np.zeros(shape)
self.high = high + np.zeros(shape) self.high = high + np.zeros(shape)
def sample(self): def sample(self):
return self.np_random.uniform(low=self.low, high=self.high, size=self.low.shape) return prng.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
def contains(self, x): def contains(self, x):
return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all() return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()

View File

@@ -1,17 +1,16 @@
import numpy as np import numpy as np
from gym import Space
class Discrete(Space): import gym
from gym.spaces import prng
class Discrete(gym.Space):
""" """
{0,1,...,n-1} {0,1,...,n-1}
""" """
def __init__(self, n, np_random=None): def __init__(self, n):
if np_random is None:
np_random = np.random
self.np_random = np_random
self.n = n self.n = n
def sample(self): def sample(self):
return self.np_random.randint(self.n) return prng.np_random.randint(self.n)
def contains(self, x): def contains(self, x):
if isinstance(x, int): if isinstance(x, int):
as_int = x as_int = x

View File

@@ -1,7 +1,9 @@
from gym import Space
import numpy as np import numpy as np
class HighLow(Space): import gym
from gym.spaces import prng
class HighLow(gym.Space):
""" """
A matrix of dimensions n x 3, where A matrix of dimensions n x 3, where
@@ -13,17 +15,13 @@ class HighLow(Space):
e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1) e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ] the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
""" """
def __init__(self, matrix, np_random=None): def __init__(self, matrix):
""" """
A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column
is the maximum (inclusive), and the third column is the precision (number of decimals to keep) is the maximum (inclusive), and the third column is the precision (number of decimals to keep)
e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]]) e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])
""" """
if np_random is None:
np_random = np.random
self.np_random = np_random
(num_rows, num_cols) = matrix.shape (num_rows, num_cols) = matrix.shape
assert num_rows >= 1 assert num_rows >= 1
assert num_cols == 3 assert num_cols == 3
@@ -33,7 +31,7 @@ class HighLow(Space):
def sample(self): def sample(self):
# For each row: round(random .* (max - min) + min, precision) # For each row: round(random .* (max - min) + min, precision)
max_minus_min = self.matrix[:, 1] - self.matrix[:, 0] max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
random_matrix = np.multiply(max_minus_min, self.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0] random_matrix = np.multiply(max_minus_min, prng.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
rounded_matrix = np.zeros(self.num_rows) rounded_matrix = np.zeros(self.num_rows)
for i in range(self.num_rows): for i in range(self.num_rows):
rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2])) rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))

20
gym/spaces/prng.py Normal file
View File

@@ -0,0 +1,20 @@
import numpy
np_random = numpy.random.RandomState()
def seed(seed=None):
"""Seed the common numpy.random.RandomState used in spaces
CF
https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277
for some details about why we seed the spaces separately from the
envs, but tl;dr is that it's pretty uncommon for them to be used
within an actual algorithm, and the code becomes simpler to just
use this common numpy.random.RandomState.
"""
np_random.seed(seed)
# This numpy.random.RandomState gets used in all spaces for their
# 'sample' method. It's not really expected that people will be using
# these in their algorithms.
seed(0)

View File

@@ -31,7 +31,7 @@ def np_random(seed=None):
rng.seed(_int_list_from_bigint(hash_seed(seed))) rng.seed(_int_list_from_bigint(hash_seed(seed)))
return rng, seed return rng, seed
def hash_seed(seed, max_bytes=8): def hash_seed(seed=None, max_bytes=8):
"""Any given evaluation is likely to have many PRNG's active at """Any given evaluation is likely to have many PRNG's active at
once. (Most commonly, because the environment is running in once. (Most commonly, because the environment is running in
multiple processes.) There's literature indicating that having multiple processes.) There's literature indicating that having
@@ -45,7 +45,13 @@ def hash_seed(seed, max_bytes=8):
Thus, for sanity we hash the seeds before using them. (This scheme Thus, for sanity we hash the seeds before using them. (This scheme
is likely not crypto-strength, but it should be good enough to get is likely not crypto-strength, but it should be good enough to get
rid of simple correlations.) rid of simple correlations.)
Args:
seed (Optional[int]): None seeds from an operating system specific randomness source.
max_bytes: Maximum number of bytes to use in the hashed seed.
""" """
if seed is None:
seed = _seed(max_bytes=max_bytes)
hash = hashlib.sha512(str(seed).encode('utf8')).digest() hash = hashlib.sha512(str(seed).encode('utf8')).digest()
return _bigint_from_bytes(hash[:max_bytes]) return _bigint_from_bytes(hash[:max_bytes])
@@ -55,7 +61,8 @@ def _seed(a=None, max_bytes=8):
presence of concurrency. presence of concurrency.
Args: Args:
a (Optional[int, str]): None seeds from an operating system specific randomness source. If an int or str passed, all of the bits are used. a (Optional[int, str]): None seeds from an operating system specific randomness source.
max_bytes: Maximum number of bytes to use in the seed.
""" """
# Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
if a is None: if a is None: