mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-19 13:32:03 +00:00
Switch to a global PRNG for action/observation spaces (#144)
cf 58e6aa95e5 (commitcomment-17669277)
This commit is contained in:
@@ -28,14 +28,14 @@ class AlgorithmicEnv(Env):
|
||||
AlgorithmicEnv.current_length = 2
|
||||
tape_control = []
|
||||
|
||||
self.action_space = Tuple(([Discrete(2 * self.inp_dim), Discrete(2), Discrete(self.base)]))
|
||||
self.observation_space = Discrete(self.base + 1)
|
||||
|
||||
self._seed()
|
||||
self.reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
self.action_space = Tuple(([Discrete(2 * self.inp_dim, np_random=self.np_random), Discrete(2, np_random=self.np_random), Discrete(self.base, np_random=self.np_random)]))
|
||||
self.observation_space = Discrete(self.base + 1, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _get_obs(self, pos=None):
|
||||
|
@@ -41,6 +41,17 @@ class AtariEnv(gym.Env, utils.EzPickle):
|
||||
|
||||
self._seed()
|
||||
|
||||
self._action_set = self.ale.getMinimalActionSet()
|
||||
self.action_space = spaces.Discrete(len(self._action_set))
|
||||
|
||||
(screen_width,screen_height) = self.ale.getScreenDims()
|
||||
if self._obs_type == 'ram':
|
||||
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
|
||||
elif self._obs_type == 'image':
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
|
||||
else:
|
||||
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed. This gets passed as a uint, but gets
|
||||
@@ -50,17 +61,6 @@ class AtariEnv(gym.Env, utils.EzPickle):
|
||||
# Empirically, we need to seed before loading the ROM.
|
||||
self.ale.setInt(b'random_seed', seed2)
|
||||
self.ale.loadROM(self.game_path)
|
||||
self._action_set = self.ale.getMinimalActionSet()
|
||||
|
||||
self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)
|
||||
|
||||
(screen_width,screen_height) = self.ale.getScreenDims()
|
||||
if self._obs_type == 'ram':
|
||||
self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
|
||||
elif self._obs_type == 'image':
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
|
||||
else:
|
||||
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
|
||||
return [seed1, seed2]
|
||||
|
||||
def _step(self, a):
|
||||
|
@@ -159,16 +159,16 @@ class GoEnv(gym.Env):
|
||||
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
|
||||
self.reset()
|
||||
|
||||
shape = pachi_py.CreateBoard(self.board_size).encode().shape
|
||||
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
|
||||
# One action for each board position, pass, and resign
|
||||
self.action_space = spaces.Discrete(self.board_size**2 + 2)
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
pachi_py.pachi_srand(seed2)
|
||||
|
||||
shape = pachi_py.CreateBoard(self.board_size).encode().shape
|
||||
self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
|
||||
# One action for each board position, pass, and resign
|
||||
self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
|
||||
return [seed1, seed2]
|
||||
|
||||
def _reset(self):
|
||||
|
@@ -56,16 +56,17 @@ class HexEnv(gym.Env):
|
||||
|
||||
if self.observation_type != 'numpy3c':
|
||||
raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
|
||||
|
||||
# One action for each board position and resign
|
||||
self.action_space = spaces.Discrete(self.board_size ** 2 + 1)
|
||||
observation = self.reset()
|
||||
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
# One action for each board position and resign
|
||||
self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
|
||||
observation = self.reset()
|
||||
self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)
|
||||
|
||||
# Update the random policy if needed
|
||||
if isinstance(self.opponent, str):
|
||||
if self.opponent == 'random':
|
||||
|
@@ -97,11 +97,12 @@ class BipedalWalker(gym.Env):
|
||||
self.prev_shaping = None
|
||||
self._reset()
|
||||
|
||||
high = np.array([np.inf]*24)
|
||||
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
|
||||
self.observation_space = spaces.Box(-high, high)
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
high = np.array([np.inf]*24)
|
||||
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
|
@@ -117,10 +117,11 @@ class CarRacing(gym.Env):
|
||||
self.reward = 0.0
|
||||
self.prev_reward = 0.0
|
||||
|
||||
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1])) # steer, gas, brake
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random) # steer, gas, brake
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
|
@@ -88,14 +88,14 @@ class LunarLander(gym.Env):
|
||||
self.prev_reward = None
|
||||
self._reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
# useful range is -1 .. +1
|
||||
high = np.array([np.inf]*8)
|
||||
# nop, fire left engine, main engine, right engine
|
||||
self.action_space = spaces.Discrete(4, np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
|
||||
self.action_space = spaces.Discrete(4)
|
||||
self.observation_space = spaces.Box(-high, high)
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
|
@@ -80,15 +80,14 @@ class AcrobotEnv(core.Env):
|
||||
|
||||
def __init__(self):
|
||||
self.viewer = None
|
||||
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
|
||||
low = -high
|
||||
self.observation_space = spaces.Box(low, high)
|
||||
self.action_space = spaces.Discrete(3)
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
|
||||
low = -high
|
||||
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
|
||||
self.action_space = spaces.Discrete(3, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _reset(self):
|
||||
|
@@ -32,6 +32,11 @@ class CartPoleEnv(gym.Env):
|
||||
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
||||
self.x_threshold = 2.4
|
||||
|
||||
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
|
||||
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
|
||||
self.action_space = spaces.Discrete(2)
|
||||
self.observation_space = spaces.Box(-high, high)
|
||||
|
||||
self._seed()
|
||||
self.reset()
|
||||
self.viewer = None
|
||||
@@ -40,10 +45,6 @@ class CartPoleEnv(gym.Env):
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
|
||||
high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
|
||||
self.action_space = spaces.Discrete(2, np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
|
@@ -25,13 +25,14 @@ class MountainCarEnv(gym.Env):
|
||||
|
||||
self.viewer = None
|
||||
|
||||
self.action_space = spaces.Discrete(3)
|
||||
self.observation_space = spaces.Box(self.low, self.high)
|
||||
|
||||
self._seed()
|
||||
self.reset()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(3, np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
|
@@ -15,14 +15,15 @@ class PendulumEnv(gym.Env):
|
||||
self.max_torque=2.
|
||||
self.dt=.05
|
||||
self.viewer = None
|
||||
|
||||
high = np.array([1., 1., self.max_speed])
|
||||
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,))
|
||||
self.observation_space = spaces.Box(low=-high, high=high)
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
high = np.array([1., 1., self.max_speed])
|
||||
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self,u):
|
||||
|
@@ -54,15 +54,14 @@ class DoomBasicEnv(doom_env.DoomEnv):
|
||||
self.viewer = None
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# 3 allowed actions [0, 9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -56,15 +56,13 @@ class DoomCorridorEnv(doom_env.DoomEnv):
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# action indexes are [0, 9, 10, 12, 13, 14]
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# action indexes are [0, 9, 10, 12, 13, 14]
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -46,15 +46,13 @@ class DoomDeathmatchEnv(doom_env.DoomEnv):
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# 41 allowed actions (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 41 allowed actions (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -55,15 +55,13 @@ class DoomDefendCenterEnv(doom_env.DoomEnv):
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -52,17 +52,14 @@ class DoomDefendLineEnv(doom_env.DoomEnv):
|
||||
self.screen_width = 640 # Must match .cfg file
|
||||
self.game.set_window_visible(False)
|
||||
self.viewer = None
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
self._seed()
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -53,15 +53,13 @@ class DoomHealthGatheringEnv(doom_env.DoomEnv):
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -52,15 +52,13 @@ class DoomMyWayHomeEnv(doom_env.DoomEnv):
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [12, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -57,15 +57,14 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 3 allowed actions [0, 13, 14] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -50,16 +50,13 @@ class DoomTakeCoverEnv(doom_env.DoomEnv):
|
||||
self.game.init()
|
||||
self.game.new_episode()
|
||||
|
||||
# 2 allowed actions [9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**32
|
||||
self.game.set_seed(seed2)
|
||||
|
||||
# 2 allowed actions [9, 10] (must match .cfg file)
|
||||
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2), np_random=np_random)
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
|
||||
|
||||
return [seed1, seed2]
|
||||
seed = seeding.hash_seed(seed) % 2**32
|
||||
self.game.set_seed(seed)
|
||||
return [seed]
|
||||
|
@@ -39,19 +39,20 @@ class MujocoEnv(gym.Env):
|
||||
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
|
||||
assert not done
|
||||
self.obs_dim = observation.size
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
bounds = self.model.actuator_ctrlrange.copy()
|
||||
low = bounds[:, 0]
|
||||
high = bounds[:, 1]
|
||||
self.action_space = spaces.Box(low, high, np_random=self.np_random)
|
||||
self.action_space = spaces.Box(low, high)
|
||||
|
||||
high = np.inf*np.ones(self.obs_dim)
|
||||
low = -high
|
||||
self.observation_space = spaces.Box(low, high, np_random=self.np_random)
|
||||
self.observation_space = spaces.Box(low, high)
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
# methods to override:
|
||||
|
@@ -6,7 +6,7 @@ import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import gym
|
||||
from gym import envs
|
||||
from gym import envs, spaces
|
||||
|
||||
specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
|
||||
@tools.params(*specs)
|
||||
@@ -21,6 +21,11 @@ def test_env(spec):
|
||||
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
|
||||
return
|
||||
|
||||
# Note that this precludes running this test in multiple
|
||||
# threads. However, we probably already can't do multithreading
|
||||
# due to some environments.
|
||||
spaces.seed(0)
|
||||
|
||||
env1 = spec.make()
|
||||
env1.seed(0)
|
||||
action_samples1 = [env1.action_space.sample() for i in range(4)]
|
||||
@@ -29,6 +34,8 @@ def test_env(spec):
|
||||
step_responses1 = [env1.step(action) for action in action_samples1]
|
||||
env1.close()
|
||||
|
||||
spaces.seed(0)
|
||||
|
||||
env2 = spec.make()
|
||||
env2.seed(0)
|
||||
action_samples2 = [env2.action_space.sample() for i in range(4)]
|
||||
|
@@ -71,6 +71,11 @@ class BlackjackEnv(gym.Env):
|
||||
https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
|
||||
"""
|
||||
def __init__(self, natural=False):
|
||||
self.action_space = spaces.Discrete(2)
|
||||
self.observation_space = spaces.Tuple((
|
||||
spaces.Discrete(32),
|
||||
spaces.Discrete(11),
|
||||
spaces.Discrete(2)))
|
||||
self._seed()
|
||||
|
||||
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
|
||||
@@ -81,11 +86,6 @@ class BlackjackEnv(gym.Env):
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(2, np_random=self.np_random)
|
||||
self.observation_space = spaces.Tuple((
|
||||
spaces.Discrete(32, np_random=self.np_random),
|
||||
spaces.Discrete(11, np_random=self.np_random),
|
||||
spaces.Discrete(2, np_random=self.np_random)))
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
from gym import Env, spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
|
||||
def categorical_sample(prob_n, np_random):
|
||||
"""
|
||||
@@ -34,12 +35,13 @@ class DiscreteEnv(Env):
|
||||
self.nS = nS
|
||||
self.nA = nA
|
||||
|
||||
self.action_space = spaces.Discrete(self.nA)
|
||||
self.observation_space = spaces.Discrete(self.nS)
|
||||
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(self.nA, np_random=self.np_random)
|
||||
self.observation_space = spaces.Discrete(self.nS, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _reset(self):
|
||||
|
@@ -27,12 +27,12 @@ class NChainEnv(gym.Env):
|
||||
self.small = small # payout for 'backwards' action
|
||||
self.large = large # payout at end of chain for 'forwards' action
|
||||
self.state = 0 # Start at beginning of the chain
|
||||
self.action_space = spaces.Discrete(2)
|
||||
self.observation_space = spaces.Discrete(self.n)
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
self.action_space = spaces.Discrete(2, np_random=self.np_random)
|
||||
self.observation_space = spaces.Discrete(self.n, np_random=self.np_random)
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
|
@@ -18,14 +18,12 @@ class RouletteEnv(gym.Env):
|
||||
"""
|
||||
def __init__(self, spots=37):
|
||||
self.n = spots + 1
|
||||
self.action_space = spaces.Discrete(self.n)
|
||||
self.observation_space = spaces.Discrete(1)
|
||||
self._seed()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
|
||||
self.action_space = spaces.Discrete(self.n, np_random=self.np_random)
|
||||
self.observation_space = spaces.Discrete(1, np_random=self.np_random)
|
||||
|
||||
return [seed]
|
||||
|
||||
def _step(self, action):
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from .box import Box
|
||||
from .discrete import Discrete
|
||||
from .high_low import HighLow
|
||||
from .tuple_space import Tuple
|
||||
from gym.spaces.box import Box
|
||||
from gym.spaces.discrete import Discrete
|
||||
from gym.spaces.high_low import HighLow
|
||||
from gym.spaces.prng import seed
|
||||
from gym.spaces.tuple_space import Tuple
|
||||
|
||||
__all__ = ["Box", "Discrete", "HighLow", "Tuple"]
|
||||
|
@@ -1,20 +1,19 @@
|
||||
from gym import Space
|
||||
import numpy as np
|
||||
|
||||
class Box(Space):
|
||||
import gym
|
||||
from gym.spaces import prng
|
||||
|
||||
class Box(gym.Space):
|
||||
"""
|
||||
A box in R^n.
|
||||
I.e., each coordinate is bounded.
|
||||
"""
|
||||
def __init__(self, low, high, shape=None, np_random=None):
|
||||
def __init__(self, low, high, shape=None):
|
||||
"""
|
||||
Two kinds of valid input:
|
||||
Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
|
||||
Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
|
||||
"""
|
||||
if np_random is None:
|
||||
np_random = np.random
|
||||
self.np_random = np_random
|
||||
if shape is None:
|
||||
assert low.shape == high.shape
|
||||
self.low = low
|
||||
@@ -24,7 +23,7 @@ class Box(Space):
|
||||
self.low = low + np.zeros(shape)
|
||||
self.high = high + np.zeros(shape)
|
||||
def sample(self):
|
||||
return self.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
|
||||
return prng.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
|
||||
def contains(self, x):
|
||||
return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()
|
||||
|
||||
|
@@ -1,17 +1,16 @@
|
||||
import numpy as np
|
||||
from gym import Space
|
||||
|
||||
class Discrete(Space):
|
||||
import gym
|
||||
from gym.spaces import prng
|
||||
|
||||
class Discrete(gym.Space):
|
||||
"""
|
||||
{0,1,...,n-1}
|
||||
"""
|
||||
def __init__(self, n, np_random=None):
|
||||
if np_random is None:
|
||||
np_random = np.random
|
||||
self.np_random = np_random
|
||||
def __init__(self, n):
|
||||
self.n = n
|
||||
def sample(self):
|
||||
return self.np_random.randint(self.n)
|
||||
return prng.np_random.randint(self.n)
|
||||
def contains(self, x):
|
||||
if isinstance(x, int):
|
||||
as_int = x
|
||||
|
@@ -1,7 +1,9 @@
|
||||
from gym import Space
|
||||
import numpy as np
|
||||
|
||||
class HighLow(Space):
|
||||
import gym
|
||||
from gym.spaces import prng
|
||||
|
||||
class HighLow(gym.Space):
|
||||
"""
|
||||
A matrix of dimensions n x 3, where
|
||||
|
||||
@@ -13,17 +15,13 @@ class HighLow(Space):
|
||||
e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
|
||||
the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
|
||||
"""
|
||||
def __init__(self, matrix, np_random=None):
|
||||
def __init__(self, matrix):
|
||||
"""
|
||||
A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column
|
||||
is the maximum (inclusive), and the third column is the precision (number of decimals to keep)
|
||||
|
||||
e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])
|
||||
"""
|
||||
if np_random is None:
|
||||
np_random = np.random
|
||||
self.np_random = np_random
|
||||
|
||||
(num_rows, num_cols) = matrix.shape
|
||||
assert num_rows >= 1
|
||||
assert num_cols == 3
|
||||
@@ -33,7 +31,7 @@ class HighLow(Space):
|
||||
def sample(self):
|
||||
# For each row: round(random .* (max - min) + min, precision)
|
||||
max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
|
||||
random_matrix = np.multiply(max_minus_min, self.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
|
||||
random_matrix = np.multiply(max_minus_min, prng.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
|
||||
rounded_matrix = np.zeros(self.num_rows)
|
||||
for i in range(self.num_rows):
|
||||
rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))
|
||||
|
20
gym/spaces/prng.py
Normal file
20
gym/spaces/prng.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import numpy
|
||||
|
||||
np_random = numpy.random.RandomState()
|
||||
|
||||
def seed(seed=None):
|
||||
"""Seed the common numpy.random.RandomState used in spaces
|
||||
|
||||
CF
|
||||
https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277
|
||||
for some details about why we seed the spaces separately from the
|
||||
envs, but tl;dr is that it's pretty uncommon for them to be used
|
||||
within an actual algorithm, and the code becomes simpler to just
|
||||
use this common numpy.random.RandomState.
|
||||
"""
|
||||
np_random.seed(seed)
|
||||
|
||||
# This numpy.random.RandomState gets used in all spaces for their
|
||||
# 'sample' method. It's not really expected that people will be using
|
||||
# these in their algorithms.
|
||||
seed(0)
|
@@ -31,7 +31,7 @@ def np_random(seed=None):
|
||||
rng.seed(_int_list_from_bigint(hash_seed(seed)))
|
||||
return rng, seed
|
||||
|
||||
def hash_seed(seed, max_bytes=8):
|
||||
def hash_seed(seed=None, max_bytes=8):
|
||||
"""Any given evaluation is likely to have many PRNG's active at
|
||||
once. (Most commonly, because the environment is running in
|
||||
multiple processes.) There's literature indicating that having
|
||||
@@ -45,7 +45,13 @@ def hash_seed(seed, max_bytes=8):
|
||||
Thus, for sanity we hash the seeds before using them. (This scheme
|
||||
is likely not crypto-strength, but it should be good enough to get
|
||||
rid of simple correlations.)
|
||||
|
||||
Args:
|
||||
seed (Optional[int]): None seeds from an operating system specific randomness source.
|
||||
max_bytes: Maximum number of bytes to use in the hashed seed.
|
||||
"""
|
||||
if seed is None:
|
||||
seed = _seed(max_bytes=max_bytes)
|
||||
hash = hashlib.sha512(str(seed).encode('utf8')).digest()
|
||||
return _bigint_from_bytes(hash[:max_bytes])
|
||||
|
||||
@@ -55,7 +61,8 @@ def _seed(a=None, max_bytes=8):
|
||||
presence of concurrency.
|
||||
|
||||
Args:
|
||||
a (Optional[int, str]): None seeds from an operating system specific randomness source. If an int or str passed, all of the bits are used.
|
||||
a (Optional[int, str]): None seeds from an operating system specific randomness source.
|
||||
max_bytes: Maximum number of bytes to use in the seed.
|
||||
"""
|
||||
# Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
|
||||
if a is None:
|
||||
|
Reference in New Issue
Block a user