[WIP] add support for seeding environments (#135)

* Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2025-08-22 07:02:19 +00:00 · 2016-05-29 09:07:09 -07:00
parent 2e26518b4f
commit 58e6aa95e5
61 changed files with 711 additions and 285 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -23,8 +23,6 @@ ghostdriver.log

 junk
 MUJOCO_LOG.txt
-mujoco-bundle
-

 rllab_mujoco

@@ -36,3 +34,4 @@ tutorial/*.html

 # PyCharm project files
 .idea
+vizdoom.ini
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,7 @@ dist: trusty
 sudo: required
 cache:
  apt: true
-  pip: false
+  pip: true
 language: python
 addons:
  apt:
@@ -30,6 +30,11 @@ before_install:
    # In a pull request, there are no secrets, and hence no MuJoCo:
    # https://docs.travis-ci.com/user/pull-requests#Security-Restrictions-when-testing-Pull-Requests.
  - '[ -z ${MUJOCO_KEY_BUNDLE+x} ] || ( curl https://openai-public.s3-us-west-2.amazonaws.com/mujoco/$MUJOCO_KEY_BUNDLE.tar.gz | tar xz -C ~/.mujoco )'
+  # Without this line, Travis has fork()s fail with an out of memory
+  # error. (These fork()s are for spawning the subprocess for video
+  # recording.) We should debug the memory usage at some stage, but
+  # simply setting overcommit is a good starting point.
+  - sudo sysctl -w vm.overcommit_memory=1
 env:
  - DISPLAY=:12
 install: pip install tox-travis
--- a/README.rst
+++ b/README.rst
@@ -251,3 +251,10 @@ We are using `nose2 <https://github.com/nose-devs/nose2>`_ for tests. You can ru
 	  nose2

 You can also run tests in a specific directory by using the ``-s`` option, or by passing in the specific name of the test. See the `nose2 docs <http://nose2.readthedocs.org/en/latest/usage.html#naming-tests>`_ for more details.
+
+What's new
+----------
+
+- 2016-05-28: For controlled reproducibility, envs now support seeding
+  (cf #91 and #135). The monitor records which seeds are used. We will
+  soon add seed information to the display on the scoreboard.
--- a/examples/agents/random_agent.py
+++ b/examples/agents/random_agent.py
@@ -19,14 +19,18 @@ if __name__ == '__main__':
    logger.setLevel(logging.INFO)

    env = gym.make('CartPole-v0' if len(sys.argv)<2 else sys.argv[1])
-    agent = RandomAgent(env.action_space)

    # You provide the directory to write to (can be an existing
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
    outdir = '/tmp/random-agent-results'
-    env.monitor.start(outdir, force=True)
+    env.monitor.start(outdir, force=True, seed=0)
+
+    # This declaration must go *after* the monitor call, since the
+    # monitor's seeding creates a new action_space instance with the
+    # appropriate pseudorandom number generator.
+    agent = RandomAgent(env.action_space)

    episode_count = 100
    max_steps = 200
--- a/gym/configuration.py
+++ b/gym/configuration.py
@@ -1,9 +1,4 @@
-import hashlib
-import numpy as np
 import logging
-import os
-import random
-import struct
 import sys

 import gym
@@ -40,48 +35,3 @@ def undo_logger_setup():
    root_logger.removeHandler(handler)
    gym.logger.setLevel(logging.NOTSET)
    requests_logger.setLevel(logging.NOTSET)
-
-def seed(a=None):
-    """Seeds the 'random' and 'numpy.random' generators. By default,
-    Python seeds these with the system time. Call this if you are
-    using multiple processes.
-
-    Notes:
-        SECURITY SENSITIVE: a bug here would allow people to generate fake results. Please let us know if you find one :).
-
-    Args:
-        a (Optional[int, str]): None or no argument seeds from an operating system specific randomness source. If an int or str passed, then all of bits are used.
-    """
-    # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
-    if a is None:
-        a = bigint_from_bytes(os.urandom(32))
-
-    if isinstance(a, str):
-        a = a.encode('utf8')
-        a += hashlib.sha512(a).digest()
-        a = bigint_from_bytes(a)
-
-    # Actually seed the generators
-    random.seed(a)
-    np.random.seed(int_list_from_bigint(a))
-
-    return a
-
-# TODO: don't hardcode sizeof_int here
-def bigint_from_bytes(bytes):
-    sizeof_int = 4
-    padding = sizeof_int - len(bytes) % sizeof_int
-    bytes += '\0' * padding
-    int_count = len(bytes) / sizeof_int
-    unpacked = struct.unpack("{}I".format(int_count), bytes)
-    accum = 0
-    for i, val in enumerate(unpacked):
-        accum += 2 ** (sizeof_int * 8 * i) * val
-    return accum
-
-def int_list_from_bigint(bigint):
-    ints = []
-    while bigint > 0:
-        bigint, mod = divmod(bigint, 2 ** 32)
-        ints.append(mod)
-    return ints
--- a/gym/core.py
+++ b/gym/core.py
@@ -17,10 +17,11 @@ class Env(object):

    The main API methods that users of this class need to know are:

-        reset
        step
+        reset
        render
        close
+        seed

    When implementing an environment, override the following methods
    in your subclass:
@@ -28,6 +29,8 @@ class Env(object):
        _step
        _reset
        _render
+        _close
+        _seed

    And set the following attributes:

@@ -70,6 +73,7 @@ class Env(object):
        if close:
            return
        raise NotImplementedError
+    def _seed(self, seed=None): return []

    @property
    def monitor(self):
@@ -172,7 +176,9 @@ class Env(object):
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
-        if self._closed:
+        # _closed will be missing if this instance is still
+        # initializing.
+        if not hasattr(self, '_closed') or self._closed:
            return

        self._close()
@@ -181,6 +187,23 @@ class Env(object):
        # end up with double close.
        self._closed = True

+    def seed(self, seed=None):
+        """Sets the seed for this env's random number generator(s).
+
+        Note:
+            Some environments use multiple pseudorandom number generators.
+            We want to capture all such seeds used in order to ensure that
+            there aren't accidental correlations between multiple generators.
+
+        Returns:
+            list<bigint>: Returns the list of seeds used in this env's random
+              number generators. The first value in the list should be the
+              "main" seed, or the value which a reproducer should pass to
+              'seed'. Often, the main seed equals the provided 'seed', but
+              this won't be true if seed=None, for example.
+        """
+        return self._seed(seed)
+
    def __del__(self):
        self.close()

--- a/gym/envs/init.py
+++ b/gym/envs/init.py
@@ -228,11 +228,21 @@ for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', '
        name = ''.join([g.capitalize() for g in game.split('_')])
        if obs_type == 'ram':
            name = '{}-ram'.format(name)
+
+        nondeterministic = False
+        if game == 'elevator_action' and obs_type == 'ram':
+            # ElevatorAction-ram-v0 seems to yield slightly
+            # non-deterministic observations about 10% of the time. We
+            # should track this down eventually, but for now we just
+            # mark it as nondetermistic.
+            nondeterministic = True
+
        register(
            id='{}-v0'.format(name),
            entry_point='gym.envs.atari:AtariEnv',
            kwargs={'game': game, 'obs_type': obs_type},
            timestep_limit=10000,
+            nondeterministic=nondeterministic,
        )

 # Board games
@@ -248,6 +258,11 @@ register(
        'illegal_move_mode': 'lose',
        'board_size': 9,
    },
+    # The pachi player seems not to be determistic given a fixed seed.
+    # (Reproduce by running 'import gym; h = gym.make('Go9x9-v0'); h.seed(1); h.reset(); h.step(15); h.step(16); h.step(17)' a few times.)
+    #
+    # This is probably due to a computation time limit.
+    nondetermistic=True,
 )

 register(
@@ -260,6 +275,7 @@ register(
        'illegal_move_mode': 'lose',
        'board_size': 19,
    },
+    nondetermistic=True,
 )

 register(
--- a/gym/envs/algorithmic/algorithmic_env.py
+++ b/gym/envs/algorithmic/algorithmic_env.py
@@ -1,8 +1,7 @@
 from gym import Env
 from gym.spaces import Discrete, Tuple
-from gym.utils import colorize
+from gym.utils import colorize, seeding
 import numpy as np
-import random
 from six import StringIO
 import sys
 import math
@@ -17,6 +16,7 @@ class AlgorithmicEnv(Env):

    def __init__(self, inp_dim=1, base=10, chars=False):
        global hash_base
+
        hash_base = 50 ** np.arange(inp_dim)
        self.base = base
        self.last = 10
@@ -27,10 +27,17 @@ class AlgorithmicEnv(Env):
        self.inp_dim = inp_dim
        AlgorithmicEnv.current_length = 2
        tape_control = []
-        self.action_space = Tuple(([Discrete(2 * inp_dim), Discrete(2), Discrete(self.base)]))
-        self.observation_space = Discrete(self.base + 1)
+
+        self._seed()
        self.reset()

+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+
+        self.action_space = Tuple(([Discrete(2 * self.inp_dim, np_random=self.np_random), Discrete(2, np_random=self.np_random), Discrete(self.base, np_random=self.np_random)]))
+        self.observation_space = Discrete(self.base + 1, np_random=self.np_random)
+        return [seed]
+
    def _get_obs(self, pos=None):
        if pos is None:
            pos = self.x
@@ -198,6 +205,6 @@ class AlgorithmicEnv(Env):
            AlgorithmicEnv.sum_rewards = []
        self.sum_reward = 0.0
        self.time = 0
-        self.total_len = random.randrange(3) + AlgorithmicEnv.current_length
+        self.total_len = self.np_random.randint(3) + AlgorithmicEnv.current_length
        self.set_data()
        return self._get_obs()
--- a/gym/envs/algorithmic/copy.py
+++ b/gym/envs/algorithmic/copy.py
@@ -2,7 +2,6 @@
 Task is to copy content from the input tape to
 the output tape. http://arxiv.org/abs/1511.07275
 """
-import random
 import numpy as np
 from gym.envs.algorithmic import algorithmic_env
 from gym.envs.algorithmic.algorithmic_env import ha
@@ -17,8 +16,7 @@ class CopyEnv(algorithmic_env.AlgorithmicEnv):
        self.content = {}
        self.target = {}
        for i in range(self.total_len):
-            val = random.randrange(self.base)
+            val = self.np_random.randint(self.base)
            self.content[ha(np.array([i]))] = val
            self.target[i] = val
        self.total_reward = self.total_len
-
--- a/gym/envs/algorithmic/duplicated_input.py
+++ b/gym/envs/algorithmic/duplicated_input.py
@@ -3,7 +3,6 @@ Task is to return every second character from the input tape.
 http://arxiv.org/abs/1511.07275
 """

-import random
 import numpy as np
 from gym.envs.algorithmic import algorithmic_env
 from gym.envs.algorithmic.algorithmic_env import ha
@@ -20,7 +19,7 @@ class DuplicatedInputEnv(algorithmic_env.AlgorithmicEnv):
        self.target = {}
        copies = int(self.total_len / self.duplication)
        for i in range(copies):
-            val = random.randrange(self.base)
+            val = self.np_random.randint(self.base)
            self.target[i] = val
            for d in range(self.duplication):
                self.content[ha(np.array([i * self.duplication + d]))] = val
--- a/gym/envs/algorithmic/repeat_copy.py
+++ b/gym/envs/algorithmic/repeat_copy.py
@@ -2,7 +2,6 @@
 Task is to copy content multiple-times from the input tape to
 the output tape. http://arxiv.org/abs/1511.07275
 """
-import random
 import numpy as np
 from gym.envs.algorithmic import algorithmic_env
 from gym.envs.algorithmic.algorithmic_env import ha
@@ -20,10 +19,9 @@ class RepeatCopyEnv(algorithmic_env.AlgorithmicEnv):
        self.target = {}
        unique = set()
        for i in range(self.total_len):
-            val = random.randrange(self.base)
+            val = self.np_random.randint(self.base)
            self.content[ha(np.array([i]))] = val
            self.target[i] = val
            self.target[2 * self.total_len - i - 1] = val
            self.target[2 * self.total_len + i] = val
        self.total_reward = 3.0 * self.total_len + 0.9
-
--- a/gym/envs/algorithmic/reverse.py
+++ b/gym/envs/algorithmic/reverse.py
@@ -3,7 +3,6 @@ Task is to reverse content over the input tape.
 http://arxiv.org/abs/1511.07275
 """

-import random
 import numpy as np
 from gym.envs.algorithmic import algorithmic_env
 from gym.envs.algorithmic.algorithmic_env import ha
@@ -21,7 +20,7 @@ class ReverseEnv(algorithmic_env.AlgorithmicEnv):
        self.content = {}
        self.target = {}
        for i in range(self.total_len):
-            val = random.randrange(self.base)
+            val = self.np_random.randint(self.base)
            self.content[ha(np.array([i]))] = val
            self.target[self.total_len - i - 1] = val
        self.total_reward = self.total_len + 0.9
--- a/gym/envs/algorithmic/reversed_addition.py
+++ b/gym/envs/algorithmic/reversed_addition.py
@@ -1,4 +1,3 @@
-import random
 import numpy as np
 from gym.envs.algorithmic import algorithmic_env
 from gym.envs.algorithmic.algorithmic_env import ha
@@ -17,7 +16,7 @@ class ReversedAdditionEnv(algorithmic_env.AlgorithmicEnv):
        for i in range(self.total_len):
            vals = []
            for k in range(self.rows):
-                val = random.randrange(self.base)
+                val = self.np_random.randint(self.base)
                self.content[ha(np.array([i, k]))] = val
                vals.append(val)
            total = sum(vals) + curry
@@ -26,5 +25,3 @@ class ReversedAdditionEnv(algorithmic_env.AlgorithmicEnv):
        if curry > 0:
            self.target[self.total_len] = curry
        self.total_reward = self.total_len
-
-
--- a/gym/envs/atari/atari_env.py
+++ b/gym/envs/atari/atari_env.py
@@ -3,6 +3,7 @@ import os
 import gym
 from gym import error, spaces
 from gym import utils
+from gym.utils import seeding

 try:
    import atari_py
@@ -30,29 +31,42 @@ class AtariEnv(gym.Env, utils.EzPickle):
    def __init__(self, game='pong', obs_type='ram'):
        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')
-        game_path = atari_py.get_game_path(game)
-        if not os.path.exists(game_path):
-            raise IOError('You asked for game %s but path %s does not exist'%(game, game_path))
-        self.ale = atari_py.ALEInterface()
-        self.ale.loadROM(game_path)
+
+        self.game_path = atari_py.get_game_path(game)
+        if not os.path.exists(self.game_path):
+            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
-        self._action_set = self.ale.getMinimalActionSet()
+        self.ale = atari_py.ALEInterface()
        self.viewer = None

-        (screen_width,screen_height) = self.ale.getScreenDims()
+        self._seed()

-        self.action_space = spaces.Discrete(len(self._action_set))
+    def _seed(self, seed=None):
+        self.np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed. This gets passed as a uint, but gets
+        # checked as an int elsewhere, so we need to keep it below
+        # 2**31.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
+        # Empirically, we need to seed before loading the ROM.
+        self.ale.setInt(b'random_seed', seed2)
+        self.ale.loadROM(self.game_path)
+        self._action_set = self.ale.getMinimalActionSet()
+
+        self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)
+
+        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
-            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
+            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
        elif self._obs_type == 'image':
-            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
+            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
+        return [seed1, seed2]

    def _step(self, a):
        reward = 0.0
        action = self._action_set[a]
-        num_steps = np.random.randint(2, 5)
+        num_steps = self.np_random.randint(2, 5)
        for _ in range(num_steps):
            reward += self.ale.act(action)
        ob = self._get_obs()
--- a/gym/envs/board_game/go.py
+++ b/gym/envs/board_game/go.py
@@ -8,6 +8,7 @@ except ImportError as e:
 import numpy as np
 import gym
 from gym import spaces
+from gym.utils import seeding
 from six import StringIO
 import sys
 import six
@@ -71,10 +72,12 @@ class GoState(object):


 ### Adversary policies ###
+def make_random_policy(np_random):
    def random_policy(curr_state, prev_state, prev_action):
        b = curr_state.board
        legal_coords = b.get_legal_coords(curr_state.color)
-    return _coord_to_action(b, np.random.choice(legal_coords))
+        return _coord_to_action(b, np_random.choice(legal_coords))
+    return random_policy

 def make_pachi_policy(board, engine_type='uct', threads=1, pachi_timestr=''):
    engine = pachi_py.PyPachiEngine(board, engine_type, six.b('threads=%d' % threads))
@@ -122,16 +125,18 @@ class GoEnv(gym.Env):
    metadata = {"render.modes": ["human", "ansi"]}

    def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size):
-        '''
+        """
        Args:
            player_color: Stone color for the agent. Either 'black' or 'white'
            opponent: An opponent policy
            observation_type: State encoding
            illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose'
-        '''
+        """
        assert isinstance(board_size, int) and board_size >= 1, 'Invalid board size: {}'.format(board_size)
        self.board_size = board_size

+        self._seed()
+
        colormap = {
            'black': pachi_py.BLACK,
            'white': pachi_py.WHITE,
@@ -150,17 +155,22 @@ class GoEnv(gym.Env):
        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

-        # One action for each board position, pass, and resign
-        self.action_space = spaces.Discrete(self.board_size**2 + 2)
-
-        if self.observation_type == 'image3c':
-            shape = pachi_py.CreateBoard(self.board_size).encode().shape
-            self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape))
-        else:
+        if self.observation_type != 'image3c':
            raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
-
        self.reset()

+    def _seed(self, seed=None):
+        self.np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        pachi_py.pachi_srand(seed2)
+
+        shape = pachi_py.CreateBoard(self.board_size).encode().shape
+        self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
+        # One action for each board position, pass, and resign
+        self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
+        return [seed1, seed2]
+
    def _reset(self):
        self.state = GoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK)

@@ -250,7 +260,7 @@ class GoEnv(gym.Env):

    def _reset_opponent(self, board):
        if self.opponent == 'random':
-            self.opponent_policy = random_policy
+            self.opponent_policy = make_random_policy(self.np_random)
        elif self.opponent == 'pachi:uct:_2400':
            self.opponent_policy = make_pachi_policy(board=board, engine_type=six.b('uct'), pachi_timestr=six.b('_2400')) # TODO: strength as argument
        else:
--- a/gym/envs/board_game/hex.py
+++ b/gym/envs/board_game/hex.py
@@ -8,13 +8,14 @@ import gym
 from gym import spaces
 import numpy as np
 from gym import error
+from gym.utils import seeding

-
+def make_random_policy(np_random):
    def random_policy(state):
        possible_moves = HexEnv.get_possible_actions(state)
-    a = np.random.randint(len(possible_moves))
+        a = np_random.randint(len(possible_moves))
        return possible_moves[a]
-
+    return random_policy

 class HexEnv(gym.Env):
    """
@@ -46,13 +47,6 @@ class HexEnv(gym.Env):
            raise error.Error("player_color must be 'black' or 'white', not {}".format(player_color))

        self.opponent = opponent
-        if isinstance(self.opponent, str):
-            if opponent == 'random':
-                self.opponent_policy = random_policy
-            else:
-                raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
-        else:
-            self.opponent_policy = opponent

        assert observation_type in ['numpy3c']
        self.observation_type = observation_type
@@ -60,14 +54,28 @@ class HexEnv(gym.Env):
        assert illegal_move_mode in ['lose', 'raise']
        self.illegal_move_mode = illegal_move_mode

-        # One action for each board position and resign
-        self.action_space = spaces.Discrete(self.board_size ** 2 + 1)
-
        if self.observation_type != 'numpy3c':
            raise error.Error('Unsupported observation type: {}'.format(self.observation_type))
+        self._seed()

+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+
+        # One action for each board position and resign
+        self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
        observation = self.reset()
-        self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape))
+        self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)
+
+        # Update the random policy if needed
+        if isinstance(self.opponent, str):
+            if self.opponent == 'random':
+                self.opponent_policy = make_random_policy(self.np_random)
+            else:
+                raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
+        else:
+            self.opponent_policy = self.opponent
+
+        return [seed]

    def _reset(self):
        self.state = np.zeros((3, self.board_size, self.board_size))
--- a/gym/envs/box2d/bipedal_walker.py
+++ b/gym/envs/box2d/bipedal_walker.py
@@ -6,6 +6,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute

 import gym
 from gym import spaces
+from gym.utils import colorize, seeding

 # This is simple 4-joints walker robot environment.
 #
@@ -86,12 +87,9 @@ class BipedalWalker(gym.Env):
    hardcore = False

    def __init__(self):
+        self._seed()
        self.viewer = None

-        high = np.array([np.inf]*24)
-        self.action_space = spaces.Box( np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]) )
-        self.observation_space = spaces.Box(-high, high)
-
        self.world = Box2D.b2World()
        self.terrain = None
        self.hull = None
@@ -99,6 +97,13 @@ class BipedalWalker(gym.Env):
        self.prev_shaping = None
        self._reset()

+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        high = np.array([np.inf]*24)
+        self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
+        self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
+        return [seed]
+
    def _destroy(self):
        if not self.terrain: return
        self.world.contactListener = None
@@ -128,11 +133,11 @@ class BipedalWalker(gym.Env):

            if state==GRASS and not oneshot:
                velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
-                if i > TERRAIN_STARTPAD: velocity += np.random.uniform(-1, 1)/SCALE   #1
+                if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE   #1
                y += velocity

            elif state==PIT and oneshot:
-                counter = np.random.randint(3, 5)
+                counter = self.np_random.randint(3, 5)
                poly = [
                    (x,              y),
                    (x+TERRAIN_STEP, y),
@@ -162,7 +167,7 @@ class BipedalWalker(gym.Env):
                    y -= 4*TERRAIN_STEP

            elif state==STUMP and oneshot:
-                counter = np.random.randint(1, 3)
+                counter = self.np_random.randint(1, 3)
                poly = [
                    (x,                      y),
                    (x+counter*TERRAIN_STEP, y),
@@ -178,9 +183,9 @@ class BipedalWalker(gym.Env):
                self.terrain.append(t)

            elif state==STAIRS and oneshot:
-                stair_height = +1 if np.random.ranf() > 0.5 else -1
-                stair_width = np.random.randint(4, 5)
-                stair_steps = np.random.randint(3, 5)
+                stair_height = +1 if self.np_random.rand() > 0.5 else -1
+                stair_width = self.np_random.randint(4, 5)
+                stair_steps = self.np_random.randint(3, 5)
                original_y = y
                for s in range(stair_steps):
                    poly = [
@@ -207,9 +212,9 @@ class BipedalWalker(gym.Env):
            self.terrain_y.append(y)
            counter -= 1
            if counter==0:
-                counter = np.random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
+                counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
                if state==GRASS and hardcore:
-                    state = np.random.randint(1, _STATES_)
+                    state = self.np_random.randint(1, _STATES_)
                    oneshot = True
                else:
                    state = GRASS
@@ -240,11 +245,11 @@ class BipedalWalker(gym.Env):
        # Sorry for the clouds, couldn't resist
        self.cloud_poly   = []
        for i in range(TERRAIN_LENGTH//20):
-            x = np.random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
+            x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
            y = VIEWPORT_H/SCALE*3/4
            poly = [
-                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP),
-                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+np.random.uniform(0,5*TERRAIN_STEP) )
+                (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
+                 y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
                for a in range(5) ]
            x1 = min( [p[0] for p in poly] )
            x2 = max( [p[0] for p in poly] )
@@ -278,7 +283,7 @@ class BipedalWalker(gym.Env):
                )
        self.hull.color1 = (0.5,0.4,0.9)
        self.hull.color2 = (0.3,0.3,0.5)
-        self.hull.ApplyForceToCenter((np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
+        self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)

        self.legs = []
        self.joints = []
--- a/gym/envs/box2d/car_racing.py
+++ b/gym/envs/box2d/car_racing.py
@@ -7,6 +7,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute
 import gym
 from gym import spaces
 from gym.envs.classic_control import rendering
+from gym.utils import colorize, seeding

 import pyglet
 from pyglet.gl import *
@@ -106,8 +107,7 @@ class CarRacing(gym.Env):
    }

    def __init__(self):
-        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]) )  # steer, gas, brake
-        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3))
+        self._seed()
        self.world = Box2D.b2World((0,0), contactListener=FrictionDetector(self))
        self.viewer = None
        self.invisible_state_window = None
@@ -117,6 +117,12 @@ class CarRacing(gym.Env):
        self.reward = 0.0
        self.prev_reward = 0.0

+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random)  # steer, gas, brake
+        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
+        return [seed]
+
    def _destroy(self):
        if not self.road: return
        for t in self.road:
@@ -130,8 +136,8 @@ class CarRacing(gym.Env):
        # Create checkpoints
        checkpoints = []
        for c in range(CHECKPOINTS):
-            alpha = 2*math.pi*c/CHECKPOINTS + np.random.uniform(0, 2*math.pi*1/CHECKPOINTS)
-            rad = np.random.uniform(TRACK_RAD/3, TRACK_RAD)
+            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
+            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
            if c==0:
                alpha = 0
                rad = 1.5*TRACK_RAD
--- a/gym/envs/box2d/lunar_lander.py
+++ b/gym/envs/box2d/lunar_lander.py
@@ -6,6 +6,7 @@ from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revolute

 import gym
 from gym import spaces
+from gym.utils import seeding

 # Rocket trajectory optimization is a classic topic in Optimal Control.
 #
@@ -76,12 +77,9 @@ class LunarLander(gym.Env):
    }

    def __init__(self):
+        self._seed()
        self.viewer = None

-        high = np.array([np.inf]*8)                               # useful range is -1 .. +1
-        self.action_space = spaces.Discrete(4)                    # nop, fire left engine, main engine, right engine
-        self.observation_space = spaces.Box(-high, high)
-
        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
@@ -90,6 +88,16 @@ class LunarLander(gym.Env):
        self.prev_reward = None
        self._reset()

+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+
+        # useful range is -1 .. +1
+        high = np.array([np.inf]*8)
+        # nop, fire left engine, main engine, right engine
+        self.action_space = spaces.Discrete(4, np_random=self.np_random)
+        self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
+        return [seed]
+
    def _destroy(self):
        if not self.moon: return
        self.world.contactListener = None
@@ -112,7 +120,7 @@ class LunarLander(gym.Env):

        # terrain
        CHUNKS = 11
-        height = np.random.uniform(0, H/2, size=(CHUNKS+1,) )
+        height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
        chunk_x  = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
        self.helipad_x1 = chunk_x[CHUNKS//2-1]
        self.helipad_x2 = chunk_x[CHUNKS//2+1]
@@ -153,8 +161,8 @@ class LunarLander(gym.Env):
        self.lander.color1 = (0.5,0.4,0.9)
        self.lander.color2 = (0.3,0.3,0.5)
        self.lander.ApplyForceToCenter( (
-            np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
-            np.random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
+            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
+            self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
            ), True)

        self.legs = []
@@ -222,7 +230,7 @@ class LunarLander(gym.Env):
        # Engines
        tip  = (math.sin(self.lander.angle), math.cos(self.lander.angle))
        side = (-tip[1], tip[0]);
-        dispersion = [np.random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
+        dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
        if action==2: # Main engine
            ox =  tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1]   # 4 is move a bit downwards, +-2 for randomness
            oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
@@ -368,4 +376,3 @@ if __name__=="__main__":

        env.render()
        if done: break
-
--- a/gym/envs/classic_control/acrobot.py
+++ b/gym/envs/classic_control/acrobot.py
@@ -1,5 +1,6 @@
 """classic Acrobot task"""
 from gym import core, spaces
+from gym.utils import seeding
 import numpy as np
 import time

@@ -78,14 +79,20 @@ class AcrobotEnv(core.Env):
    actions_num = 3

    def __init__(self):
+        self.viewer = None
+        self._seed()
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+
        high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
-        self.observation_space = spaces.Box(low, high)
-        self.action_space = spaces.Discrete(3)
-        self.viewer = None
+        self.observation_space = spaces.Box(low, high, np_random=self.np_random)
+        self.action_space = spaces.Discrete(3, np_random=self.np_random)
+        return [seed]

    def _reset(self):
-        self.state = np.random.uniform(low=-0.1, high=0.1, size=(4,))
+        self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
        return self.state

    def _step(self, a):
@@ -94,7 +101,7 @@ class AcrobotEnv(core.Env):

        # Add noise to the force action
        if self.torque_noise_max > 0:
-            torque += np.random.uniform(-self.torque_noise_max, self.torque_noise_max)
+            torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
--- a/gym/envs/classic_control/cartpole.py
+++ b/gym/envs/classic_control/cartpole.py
@@ -7,6 +7,7 @@ import logging
 import math
 import gym
 from gym import spaces
+from gym.utils import seeding
 import numpy as np

 logger = logging.getLogger(__name__)
@@ -30,15 +31,20 @@ class CartPoleEnv(gym.Env):
        # Angle at which to fail the episode
        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 2.4
+
+        self._seed()
        self.reset()
        self.viewer = None

+        self.steps_beyond_done = None
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
        # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
        high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
-        self.action_space = spaces.Discrete(2)
-        self.observation_space = spaces.Box(-high, high)
-
-        self.steps_beyond_done = None
+        self.action_space = spaces.Discrete(2, np_random=self.np_random)
+        self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
+        return [seed]

    def _step(self, action):
        action = action
@@ -77,7 +83,7 @@ class CartPoleEnv(gym.Env):
        return np.array(self.state), reward, done, {}

    def _reset(self):
-        self.state = np.random.uniform(low=-0.05, high=0.05, size=(4,))
+        self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
        self.steps_beyond_done = None
        return np.array(self.state)

--- a/gym/envs/classic_control/mountain_car.py
+++ b/gym/envs/classic_control/mountain_car.py
@@ -5,6 +5,7 @@ https://webdocs.cs.ualberta.ca/~sutton/MountainCar/MountainCar1.cp
 import math
 import gym
 from gym import spaces
+from gym.utils import seeding
 import numpy as np

 class MountainCarEnv(gym.Env):
@@ -14,10 +15,6 @@ class MountainCarEnv(gym.Env):
    }

    def __init__(self):
-        self.reset()
-        self.viewer = None
-        self.reset()
-
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
@@ -26,8 +23,16 @@ class MountainCarEnv(gym.Env):
        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

-        self.action_space = spaces.Discrete(3)
-        self.observation_space = spaces.Box(self.low, self.high)
+        self.viewer = None
+
+        self._seed()
+        self.reset()
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        self.action_space = spaces.Discrete(3, np_random=self.np_random)
+        self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
+        return [seed]

    def _step(self, action):
        # action = np.sign((self.state[0]+math.pi/2) * self.state[1])+1
@@ -48,7 +53,7 @@ class MountainCarEnv(gym.Env):
        return np.array(self.state), reward, done, {}

    def _reset(self):
-        self.state = np.array([np.random.uniform(low=-0.6, high=-0.4), 0])
+        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
        return np.array(self.state)

    def _height(self, xs):
--- a/gym/envs/classic_control/pendulum.py
+++ b/gym/envs/classic_control/pendulum.py
@@ -1,5 +1,6 @@
 import gym
 from gym import spaces
+from gym.utils import seeding
 import numpy as np
 from os import path

@@ -14,10 +15,15 @@ class PendulumEnv(gym.Env):
        self.max_torque=2.
        self.dt=.05
        self.viewer = None
+        self._seed()
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)

        high = np.array([1., 1., self.max_speed])
-        self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,))
-        self.observation_space = spaces.Box(low=-high, high=high)
+        self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
+        self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
+        return [seed]

    def _step(self,u):
        th, thdot = self.state # th := theta
@@ -40,7 +46,7 @@ class PendulumEnv(gym.Env):

    def _reset(self):
        high = np.array([np.pi, 1])
-        self.state = np.random.uniform(low=-high, high=high)
+        self.state = self.np_random.uniform(low=-high, high=high)
        self.last_u = None
        return self._get_obs()

--- a/gym/envs/doom/doom_basic.py
+++ b/gym/envs/doom/doom_basic.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -49,10 +50,19 @@ class DoomBasicEnv(doom_env.DoomEnv):
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 3 allowed actions [0, 9, 10] (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 3 allowed actions [0, 9, 10] (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_corridor.py
+++ b/gym/envs/doom/doom_corridor.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -50,10 +51,20 @@ class DoomCorridorEnv(doom_env.DoomEnv):
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('deadly_corridor.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # action indexes are [0, 9, 10, 12, 13, 14]
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # action indexes are [0, 9, 10, 12, 13, 14]
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_deathmatch.py
+++ b/gym/envs/doom/doom_deathmatch.py
@@ -5,6 +5,7 @@ import numpy as np

 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
+from gym.utils import seeding
 from gym.envs.doom import doom_env

 logger = logging.getLogger(__name__)
@@ -40,10 +41,20 @@ class DoomDeathmatchEnv(doom_env.DoomEnv):
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('deathmatch.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 41 allowed actions (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 41 allowed actions (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 37 + [[0, 10, 0]] * 5), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_defend_center.py
+++ b/gym/envs/doom/doom_defend_center.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -49,10 +50,20 @@ class DoomDefendCenterEnv(doom_env.DoomEnv):
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_center.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 3 allowed actions [0, 13, 14] (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 3 allowed actions [0, 13, 14] (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_defend_line.py
+++ b/gym/envs/doom/doom_defend_line.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -49,10 +50,19 @@ class DoomDefendLineEnv(doom_env.DoomEnv):
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_line.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 3 allowed actions [0, 13, 14] (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
+        self._seed()
        self.game.init()
        self.game.new_episode()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 3 allowed actions [0, 13, 14] (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_health_gathering.py
+++ b/gym/envs/doom/doom_health_gathering.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -47,10 +48,20 @@ class DoomHealthGatheringEnv(doom_env.DoomEnv):
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 3 allowed actions [12, 13, 14] (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 3 allowed actions [12, 13, 14] (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_my_way_home.py
+++ b/gym/envs/doom/doom_my_way_home.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -46,10 +47,20 @@ class DoomMyWayHomeEnv(doom_env.DoomEnv):
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('my_way_home.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 3 allowed actions [12, 13, 14] (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 3 allowed actions [12, 13, 14] (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_predict_position.py
+++ b/gym/envs/doom/doom_predict_position.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -51,10 +52,20 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 3 allowed actions [0, 13, 14] (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 3 allowed actions [0, 13, 14] (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+        return [seed1, seed2]
--- a/gym/envs/doom/doom_take_cover.py
+++ b/gym/envs/doom/doom_take_cover.py
@@ -6,6 +6,7 @@ import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
+from gym.utils import seeding

 logger = logging.getLogger(__name__)

@@ -44,10 +45,21 @@ class DoomTakeCoverEnv(doom_env.DoomEnv):
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
-        # 2 allowed actions [9, 10] (must match .cfg file)
-        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
-        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
+
+        self._seed()
+
+    def _seed(self, seed=None):
+        np_random, seed1 = seeding.np_random(seed)
+        # Derive a random seed.
+        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
+        self.game.set_seed(seed2)
+
+        # 2 allowed actions [9, 10] (must match .cfg file)
+        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2), np_random=np_random)
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
+
+        return [seed1, seed2]
--- a/gym/envs/mujoco/ant.py
+++ b/gym/envs/mujoco/ant.py
@@ -36,8 +36,8 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        ])

    def reset_model(self):
-        qpos = self.init_qpos + np.random.uniform(size=self.model.nq,low=-.1,high=.1)
-        qvel = self.init_qvel + np.random.randn(self.model.nv) * .1
+        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq,low=-.1,high=.1)
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/half_cheetah.py
+++ b/gym/envs/mujoco/half_cheetah.py
@@ -25,8 +25,8 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        ])

    def reset_model(self):
-        qpos = self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq)
-        qvel = self.init_qvel + np.random.randn(self.model.nv) * .1
+        qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/hopper.py
+++ b/gym/envs/mujoco/hopper.py
@@ -28,8 +28,8 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        ])

    def reset_model(self):
-        qpos = self.init_qpos + np.random.uniform(low=-.005, high=.005, size=self.model.nq)
-        qvel = self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
+        qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
+        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/humanoid.py
+++ b/gym/envs/mujoco/humanoid.py
@@ -39,8 +39,8 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def reset_model(self):
        c = 0.01
        self.set_state(
-            self.init_qpos + np.random.uniform(low=-c, high=c, size=self.model.nq),
-            self.init_qvel + np.random.uniform(low=-c, high=c, size=self.model.nv,)
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
        )
        return self._get_obs()

--- a/gym/envs/mujoco/humanoidstandup.py
+++ b/gym/envs/mujoco/humanoidstandup.py
@@ -40,8 +40,8 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def reset_model(self):
        c = 0.01
        self.set_state(
-            self.init_qpos + np.random.uniform(low=-c, high=c, size=self.model.nq),
-            self.init_qvel + np.random.uniform(low=-c, high=c, size=self.model.nv,)
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
        )
        return self._get_obs()

--- a/gym/envs/mujoco/inverted_double_pendulum.py
+++ b/gym/envs/mujoco/inverted_double_pendulum.py
@@ -31,8 +31,8 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    def reset_model(self):
        self.set_state(
-            self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq),
-            self.init_qvel + np.random.randn(self.model.nv) * .1
+            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
+            self.init_qvel + self.np_random.randn(self.model.nv) * .1
        )
        return self._get_obs()

--- a/gym/envs/mujoco/inverted_pendulum.py
+++ b/gym/envs/mujoco/inverted_pendulum.py
@@ -16,8 +16,8 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        return ob, reward, done, {}

    def reset_model(self):
-        qpos = self.init_qpos + np.random.uniform(size=self.model.nq, low=-0.01, high=0.01)
-        qvel = self.init_qvel + np.random.uniform(size=self.model.nv, low=-0.01, high=0.01)
+        qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
+        qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
        self.set_state(qpos, qvel)
        return self._get_obs()

--- a/gym/envs/mujoco/mujoco_env.py
+++ b/gym/envs/mujoco/mujoco_env.py
@@ -1,6 +1,7 @@
 import os

 from gym import error, spaces
+from gym.utils import seeding
 import numpy as np
 from os import path
 import gym
@@ -13,9 +14,7 @@ except ImportError as e:
    raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))

 class MujocoEnv(gym.Env):
-
-    """
-    Superclass of MuJoCo environments.
+    """Superclass for all MuJoCo environments.
    """

    def __init__(self, model_path, frame_skip):
@@ -40,15 +39,20 @@ class MujocoEnv(gym.Env):
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = observation.size
+        self._seed()
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
-        self.action_space = spaces.Box(low, high)
+        self.action_space = spaces.Box(low, high, np_random=self.np_random)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
-        self.observation_space = spaces.Box(low, high)
+        self.observation_space = spaces.Box(low, high, np_random=self.np_random)
+        return [seed]

    # methods to override:
    # ----------------------------
--- a/gym/envs/mujoco/reacher.py
+++ b/gym/envs/mujoco/reacher.py
@@ -21,12 +21,12 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
        self.viewer.cam.trackbodyid=0

    def reset_model(self):
-        qpos = np.random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
+        qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
        while True:
-            self.goal = np.random.uniform(low=-.2, high=.2, size=2)
+            self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
            if np.linalg.norm(self.goal) < 2: break
        qpos[-2:] = self.goal
-        qvel = self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)
+        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
        qvel[-2:] = 0
        self.set_state(qpos, qvel)
        return self._get_obs()
--- a/gym/envs/mujoco/swimmer.py
+++ b/gym/envs/mujoco/swimmer.py
@@ -26,7 +26,7 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    def reset_model(self):
        self.set_state(
-            self.init_qpos + np.random.uniform(low=-.1, high=.1, size=self.model.nq),
-            self.init_qvel + np.random.uniform(low=-.1, high=.1, size=self.model.nv)
+            self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
        )
        return self._get_obs()
--- a/gym/envs/mujoco/walker2d.py
+++ b/gym/envs/mujoco/walker2d.py
@@ -28,8 +28,8 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):

    def reset_model(self):
        self.set_state(
-            self.init_qpos + np.random.uniform(low=-.005, high=.005, size=self.model.nq),
-            self.init_qvel + np.random.uniform(low=-.005, high=.005, size=self.model.nv)            
+            self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
+            self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
        )
        return self._get_obs()

--- a/gym/envs/registration.py
+++ b/gym/envs/registration.py
@@ -24,6 +24,7 @@ class EnvSpec(object):
        trials (int): The number of trials to average reward over
        reward_threshold (Optional[int]): The reward threshold before the task is considered solved
        kwargs (dict): The kwargs to pass to the environment class
+        nondeterministic (bool): Whether this environment is non-deterministic even after seeding

    Attributes:
        id (str): The official environment ID
@@ -31,7 +32,7 @@ class EnvSpec(object):
        trials (int): The number of trials run in official evaluation
    """

-    def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, kwargs=None):
+    def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, kwargs=None, nondeterministic=False):
        self.id = id
        # Evaluation parameters
        self.timestep_limit = timestep_limit
@@ -46,6 +47,7 @@ class EnvSpec(object):
        self._env_name = match.group(1)
        self._entry_point = entry_point
        self._kwargs = {} if kwargs is None else kwargs
+        self._nondeterministic = nondeterministic

    def make(self):
        """Instantiates an instance of the environment with appropriate kwargs"""
--- a/gym/envs/tests/test_determinism.py
+++ b/gym/envs/tests/test_determinism.py
@@ -0,0 +1,77 @@
+import numpy as np
+from nose2 import tools
+import os
+
+import logging
+logger = logging.getLogger(__name__)
+
+import gym
+from gym import envs
+
+specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
+@tools.params(*specs)
+def test_env(spec):
+    # Skip mujoco tests for pull request CI
+    skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
+    if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
+        return
+
+    # TODO(jonas 2016-05-11): Re-enable these tests after fixing box2d-py
+    if spec._entry_point.startswith('gym.envs.box2d:'):
+        logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
+        return
+
+    env1 = spec.make()
+    env1.seed(0)
+    action_samples1 = [env1.action_space.sample() for i in range(4)]
+    observation_samples1 = [env1.observation_space.sample() for i in range(4)]
+    initial_observation1 = env1.reset()
+    step_responses1 = [env1.step(action) for action in action_samples1]
+    env1.close()
+
+    env2 = spec.make()
+    env2.seed(0)
+    action_samples2 = [env2.action_space.sample() for i in range(4)]
+    observation_samples2 = [env2.observation_space.sample() for i in range(4)]
+    initial_observation2 = env2.reset()
+    step_responses2 = [env2.step(action) for action in action_samples2]
+    env2.close()
+
+    for i, (action_sample1, action_sample2) in enumerate(zip(action_samples1, action_samples2)):
+        assert np.array_equal(action_sample1, action_sample2), '[{}] action_sample1: {}, action_sample2: {}'.format(i, action_sample1, action_sample2)
+
+    for i, (observation_sample1, observation_sample2) in enumerate(zip(observation_samples1, observation_samples2)):
+        # Allows for NaNs
+        np.testing.assert_array_equal(observation_sample1, observation_sample2)
+
+    # Don't check rollout equality if it's a a nondetermistic
+    # environment.
+    if spec.nondetermistic:
+        return
+
+    assert np.array_equal(initial_observation1, initial_observation2), 'initial_observation1: {}, initial_observation2: {}'.format(initial_observation1, initial_observation2)
+
+    for i, ((o1, r1, d1, i1), (o2, r2, d2, i2)) in enumerate(zip(step_responses1, step_responses2)):
+        assert_equals(o1, o2, '[{}] '.format(i))
+        assert r1 == r2, '[{}] r1: {}, r2: {}'.format(i, r1, r2)
+        assert d1 == d2, '[{}] d1: {}, d2: {}'.format(i, d1, d2)
+
+        # Go returns a Pachi game board in info, which doesn't
+        # properly check equality. For now, we hack around this by
+        # just skipping Go.
+        if spec.id not in ['Go9x9-v0', 'Go19x19-v0']:
+            assert_equals(i1, i2, '[{}] '.format(i))
+
+def assert_equals(a, b, prefix=None):
+    assert type(a) == type(b), "{}Differing types: {} and {}".format(prefix, a, b)
+    if isinstance(a, dict):
+        assert list(a.keys()) == list(b.keys()), "{}Key sets differ: {} and {}".format(prefix, a, b)
+
+        for k in a.keys():
+            v_a = a[k]
+            v_b = b[k]
+            assert_equals(v_a, v_b)
+    elif isinstance(a, np.ndarray):
+        np.testing.assert_array_equal(a, b)
+    else:
+        assert a == b
--- a/gym/envs/tests/test_envs.py
+++ b/gym/envs/tests/test_envs.py
@@ -15,7 +15,7 @@ specs = [spec for spec in envs.registry.all() if spec._entry_point is not None]
@tools.params(*specs)
 def test_env(spec):
    # Skip mujoco tests for pull request CI
-    skip_mujoco = not os.environ.get('MUJOCO_KEY_BUNDLE')
+    skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
    if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
        return

--- a/gym/envs/toy_text/blackjack.py
+++ b/gym/envs/toy_text/blackjack.py
@@ -1,6 +1,6 @@
 import gym
-import random
 from gym import spaces
+from gym.utils import seeding

 def cmp(a, b):
    return (a > b) - (a < b)
@@ -9,12 +9,12 @@ def cmp(a, b):
 deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]


-def draw_card():
-    return random.choice(deck)
+def draw_card(np_random):
+    return np_random.choice(deck)


-def draw_hand():
-    return [draw_card(), draw_card()]
+def draw_hand(np_random):
+    return [draw_card(np_random), draw_card(np_random)]


 def usable_ace(hand):  # Does this hand have a usable ace?
@@ -71,20 +71,27 @@ class BlackjackEnv(gym.Env):
    https://webdocs.cs.ualberta.ca/~sutton/book/the-book.html
    """
    def __init__(self, natural=False):
-        self.action_space = spaces.Discrete(2)
-        self.observation_space = spaces.Tuple((spaces.Discrete(32),
-                                               spaces.Discrete(11),
-                                               spaces.Discrete(2)))
+        self._seed()
+
        # Flag to payout 1.5 on a "natural" blackjack win, like casino rules
        # Ref: http://www.bicyclecards.com/how-to-play/blackjack/
        self.natural = natural
        # Start the first game
        self._reset()

+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        self.action_space = spaces.Discrete(2, np_random=self.np_random)
+        self.observation_space = spaces.Tuple((
+            spaces.Discrete(32, np_random=self.np_random),
+            spaces.Discrete(11, np_random=self.np_random),
+            spaces.Discrete(2, np_random=self.np_random)))
+        return [seed]
+
    def _step(self, action):
        assert(self.action_space.contains(action))
        if action:  # hit: add a card to players hand and return
-            self.player.append(draw_card())
+            self.player.append(draw_card(self.np_random))
            if is_bust(self.player):
                done = True
                reward = -1
@@ -94,7 +101,7 @@ class BlackjackEnv(gym.Env):
        else:  # stick: play out the dealers hand, and score
            done = True
            while sum_hand(self.dealer) < 17:
-                self.dealer.append(draw_card())
+                self.dealer.append(draw_card(self.np_random))
            reward = cmp(score(self.player), score(self.dealer))
            if self.natural and is_natural(self.player) and reward == 1:
                reward = 1.5
@@ -104,6 +111,6 @@ class BlackjackEnv(gym.Env):
        return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))

    def _reset(self):
-        self.dealer = draw_hand()
-        self.player = draw_hand()
+        self.dealer = draw_hand(self.np_random)
+        self.player = draw_hand(self.np_random)
        return self._get_obs()
--- a/gym/envs/toy_text/discrete.py
+++ b/gym/envs/toy_text/discrete.py
@@ -1,15 +1,15 @@
-from gym import Env
-from gym import spaces
+from gym import Env, spaces
+from gym.utils import seeding
 import numpy as np

-def categorical_sample(prob_n):
+def categorical_sample(prob_n, np_random):
    """
    Sample from categorical distribution
    Each row specifies class probabilities
    """
    prob_n = np.asarray(prob_n)
    csprob_n = np.cumsum(prob_n)
-    return (csprob_n > np.random.rand()).argmax()
+    return (csprob_n > np_random.rand()).argmax()


 class DiscreteEnv(Env):
@@ -28,24 +28,27 @@ class DiscreteEnv(Env):

    """
    def __init__(self, nS, nA, P, isd):
-        self.action_space = spaces.Discrete(nA)
-        self.observation_space = spaces.Discrete(nS)
-        self.nA = nA
        self.P = P
        self.isd = isd
        self.lastaction=None # for rendering
+        self.nS = nS
+        self.nA = nA

-    @property
-    def nS(self):
-        return self.observation_space.n
+        self._seed()
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        self.action_space = spaces.Discrete(self.nA, np_random=self.np_random)
+        self.observation_space = spaces.Discrete(self.nS, np_random=self.np_random)
+        return [seed]

    def _reset(self):
-        self.s = categorical_sample(self.isd)
+        self.s = categorical_sample(self.isd, self.np_random)
        return self.s

    def _step(self, a):
        transitions = self.P[self.s][a]
-        i = categorical_sample([t[0] for t in transitions])
+        i = categorical_sample([t[0] for t in transitions], self.np_random)
        p, s, r, d= transitions[i]
        self.s = s
        self.lastaction=a
--- a/gym/envs/toy_text/frozen_lake.py
+++ b/gym/envs/toy_text/frozen_lake.py
@@ -111,7 +111,7 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
                            rew = float(newletter == b'G')
                            li.append((1.0, newstate, rew, done))

-        super(FrozenLakeEnv, self).__init__(nrow * ncol, 4, P, isd)
+        super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)

    def _render(self, mode='human', close=False):
        if close:
--- a/gym/envs/toy_text/nchain.py
+++ b/gym/envs/toy_text/nchain.py
@@ -1,7 +1,6 @@
 import gym
-import random
 from gym import spaces
-
+from gym.utils import seeding

 class NChainEnv(gym.Env):
    """n-Chain environment
@@ -27,13 +26,18 @@ class NChainEnv(gym.Env):
        self.slip = slip  # probability of 'slipping' an action
        self.small = small  # payout for 'backwards' action
        self.large = large  # payout at end of chain for 'forwards' action
-        self.action_space = spaces.Discrete(2)
-        self.observation_space = spaces.Discrete(n)
        self.state = 0  # Start at beginning of the chain
+        self._seed()
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        self.action_space = spaces.Discrete(2, np_random=self.np_random)
+        self.observation_space = spaces.Discrete(self.n, np_random=self.np_random)
+        return [seed]

    def _step(self, action):
        assert(self.action_space.contains(action))
-        if random.random() < self.slip:
+        if self.np_random.rand() < self.slip:
            action = not action  # agent slipped, reverse action taken
        if action:  # 'backwards': go back to the beginning, get small reward
            reward = self.small
--- a/gym/envs/toy_text/roulette.py
+++ b/gym/envs/toy_text/roulette.py
@@ -2,6 +2,7 @@ import numpy as np

 import gym
 from gym import spaces
+from gym.utils import seeding


 class RouletteEnv(gym.Env):
@@ -17,8 +18,15 @@ class RouletteEnv(gym.Env):
    """
    def __init__(self, spots=37):
        self.n = spots + 1
-        self.action_space = spaces.Discrete(self.n)
-        self.observation_space = spaces.Discrete(1)
+        self._seed()
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+
+        self.action_space = spaces.Discrete(self.n, np_random=self.np_random)
+        self.observation_space = spaces.Discrete(1, np_random=self.np_random)
+
+        return [seed]

    def _step(self, action):
        assert(action >= 0 and action < self.n)
@@ -27,7 +35,7 @@ class RouletteEnv(gym.Env):
            return 0, 0, True, {}

        # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
-        val = np.random.randint(0, self.n - 1)
+        val = self.np_random.randint(0, self.n - 1)
        if val == action == 0:
            reward = self.n - 2.0
        elif val != 0 and action != 0 and val % 2 == action % 2:
--- a/gym/envs/toy_text/taxi.py
+++ b/gym/envs/toy_text/taxi.py
@@ -84,9 +84,6 @@ class TaxiEnv(discrete.DiscreteEnv):
        isd /= isd.sum()
        discrete.DiscreteEnv.__init__(self, nS, nA, P, isd)

-        self.observation_space = spaces.Discrete(500)
-        self.action_space = spaces.Discrete(6)
-
    def encode(self, taxirow, taxicol, passloc, destidx):
        # (5) 5, 5, 4
        i = taxirow
--- a/gym/error.py
+++ b/gym/error.py
@@ -12,8 +12,14 @@ class UnregisteredEnv(Error):
    pass

 class DeprecatedEnv(Error):
-    """Raised when the user requests an env from the registry with an older version
-    number than the latest env with the same name.
+    """Raised when the user requests an env from the registry with an
+    older version number than the latest env with the same name.
+    """
+    pass
+
+class UnseedableEnv(Error):
+    """Raised when the user tries to seed an env that does not support
+    seeding.
    """
    pass

--- a/gym/monitoring/monitor.py
+++ b/gym/monitoring/monitor.py
@@ -10,7 +10,7 @@ import weakref

 from gym import error, version
 from gym.monitoring import stats_recorder, video_recorder
-from gym.utils import atomic_write, closer
+from gym.utils import atomic_write, closer, seeding

 logger = logging.getLogger(__name__)

@@ -83,8 +83,9 @@ class Monitor(object):
        self.enabled = False
        self.episode_id = 0
        self._monitor_id = None
+        self.seeds = None

-    def start(self, directory, video_callable=None, force=False, resume=False):
+    def start(self, directory, video_callable=None, force=False, resume=False, seed=None):
        """Start monitoring.

        Args:
@@ -92,6 +93,7 @@ class Monitor(object):
            video_callable (Optional[function]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes.
            force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym.").
            resume (bool): Retain the training data already in this directory, which will be merged with our new data
+            seed (Optional[int]): The seed to run this environment with. By default, a random seed will be chosen.
        """
        if self.env.spec is None:
            logger.warn("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.")
@@ -116,6 +118,8 @@ class Monitor(object):
 You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5])))


+        self._monitor_id = monitor_closer.register(self)
+
        self.enabled = True
        self.directory = os.path.abspath(directory)
        # We use the 'openai-gym' prefix to determine if a file is
@@ -126,7 +130,9 @@ class Monitor(object):
        self.configure(video_callable=video_callable)
        if not os.path.exists(directory):
            os.mkdir(directory)
-        self._monitor_id = monitor_closer.register(self)
+
+        seeds = self.env.seed(seed)
+        self.seeds = seeds

    def flush(self):
        """Flush all relevant monitor information to disk."""
@@ -146,6 +152,7 @@ class Monitor(object):
                'videos': [(os.path.basename(v), os.path.basename(m))
                           for v, m in self.videos],
                'env_info': self._env_info(),
+                'seeds': self.seeds,
            }, f)

    def close(self):
@@ -249,13 +256,12 @@ class Monitor(object):
        return self.video_callable(self.episode_id)

    def _env_info(self):
-        if self.env.spec:
-            return {
-                'env_id': self.env.spec.id,
+        env_info = {
            'gym_version': version.VERSION,
        }
-        else:
-            return {}
+        if self.env.spec:
+            env_info['env_id'] = self.env.spec.id
+        return env_info

    def __del__(self):
        # Make sure we've closed up shop when garbage collecting
@@ -274,6 +280,8 @@ def load_results(training_dir):
    # Load up stats + video files
    stats_files = []
    videos = []
+    main_seeds = []
+    seeds = []
    env_infos = []

    for manifest in manifests:
@@ -284,6 +292,13 @@ def load_results(training_dir):
            videos += [(os.path.join(training_dir, v), os.path.join(training_dir, m))
                       for v, m in contents['videos']]
            env_infos.append(contents['env_info'])
+            current_seeds = contents.get('seeds', [])
+            seeds += current_seeds
+            if current_seeds:
+                main_seeds.append(current_seeds[0])
+            else:
+                # current_seeds could be None or []
+                main_seeds.append(None)

    env_info = collapse_env_infos(env_infos, training_dir)
    timestamps, episode_lengths, episode_rewards, initial_reset_timestamp = merge_stats_files(stats_files)
@@ -296,6 +311,8 @@ def load_results(training_dir):
        'episode_rewards': episode_rewards,
        'initial_reset_timestamp': initial_reset_timestamp,
        'videos': videos,
+        'main_seeds': main_seeds,
+        'seeds': seeds,
    }

 def merge_stats_files(stats_files):
--- a/gym/monitoring/stats_recorder.py
+++ b/gym/monitoring/stats_recorder.py
@@ -19,7 +19,7 @@ class StatsRecorder(object):
        self.done = None
        self.closed = False

-        filename = '{}.{}.stats.json'.format(self.file_prefix, os.getpid())
+        filename = '{}.stats.json'.format(self.file_prefix)
        self.path = os.path.join(self.directory, filename)

    def before_step(self, action):
--- a/gym/scoreboard/api.py
+++ b/gym/scoreboard/api.py
@@ -89,6 +89,8 @@ def upload_training_data(training_dir, api_key=None):
    timestamps = results['timestamps']
    episode_lengths = results['episode_lengths']
    episode_rewards = results['episode_rewards']
+    main_seeds = results['main_seeds']
+    seeds = results['seeds']
    videos = results['videos']

    env_id = env_info['env_id']
@@ -96,7 +98,7 @@ def upload_training_data(training_dir, api_key=None):

    # Do the relevant uploads
    if len(episode_lengths) > 0:
-        training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key, env_id=env_id)
+        training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key, env_id=env_id)
    else:
        training_episode_batch = None

@@ -112,13 +114,15 @@ def upload_training_data(training_dir, api_key=None):

    return env_info, training_episode_batch, training_video

-def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, api_key=None, env_id=None):
+def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key=None, env_id=None):
    logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
    file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
    file_upload.put({
        'episode_lengths': episode_lengths,
        'episode_rewards': episode_rewards,
        'timestamps': timestamps,
+        'main_seeds': main_seeds,
+        'seeds': seeds,
    })
    return file_upload

--- a/gym/spaces/box.py
+++ b/gym/spaces/box.py
@@ -6,12 +6,15 @@ class Box(Space):
    A box in R^n.
    I.e., each coordinate is bounded.
    """
-    def __init__(self, low, high, shape=None):
+    def __init__(self, low, high, shape=None, np_random=None):
        """
        Two kinds of valid input:
            Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
            Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
        """
+        if np_random is None:
+            np_random = np.random
+        self.np_random = np_random
        if shape is None:
            assert low.shape == high.shape
            self.low = low
@@ -21,7 +24,7 @@ class Box(Space):
            self.low = low + np.zeros(shape)
            self.high = high + np.zeros(shape)
    def sample(self):
-        return np.random.uniform(low=self.low, high=self.high, size=self.low.shape)
+        return self.np_random.uniform(low=self.low, high=self.high, size=self.low.shape)
    def contains(self, x):
        return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()

--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -5,10 +5,13 @@ class Discrete(Space):
    """
    {0,1,...,n-1}
    """
-    def __init__(self, n):
+    def __init__(self, n, np_random=None):
+        if np_random is None:
+            np_random = np.random
+        self.np_random = np_random
        self.n = n
    def sample(self):
-        return np.random.randint(self.n)
+        return self.np_random.randint(self.n)
    def contains(self, x):
        if isinstance(x, int):
            as_int = x
--- a/gym/spaces/high_low.py
+++ b/gym/spaces/high_low.py
@@ -13,13 +13,17 @@ class HighLow(Space):
    e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
    the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
    """
-    def __init__(self, matrix):
+    def __init__(self, matrix, np_random=None):
        """
        A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column
        is the maximum (inclusive), and the third column is the precision (number of decimals to keep)

        e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])
        """
+        if np_random is None:
+            np_random = np.random
+        self.np_random = np_random
+
        (num_rows, num_cols) = matrix.shape
        assert num_rows >= 1
        assert num_cols == 3
@@ -29,7 +33,7 @@ class HighLow(Space):
    def sample(self):
        # For each row: round(random .* (max - min) + min, precision)
        max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
-        random_matrix = np.multiply(max_minus_min, np.random.rand(self.num_rows, 1)) + self.matrix[:, 0]
+        random_matrix = np.multiply(max_minus_min, self.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
        rounded_matrix = np.zeros(self.num_rows)
        for i in range(self.num_rows):
            rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))
--- a/gym/utils/seeding.py
+++ b/gym/utils/seeding.py
@@ -0,0 +1,97 @@
+import hashlib
+import numpy as np
+import os
+import random as _random
+import struct
+import sys
+
+from gym import error
+
+if sys.version_info < (3,):
+    integer_types = (int, long)
+else:
+    integer_types = (int,)
+
+# Fortunately not needed right now!
+#
+# def random(seed=None):
+#     seed = _seed(seed)
+#
+#     rng = _random.Random()
+#     rng.seed(hash_seed(seed))
+#     return rng, seed
+
+def np_random(seed=None):
+    if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed):
+        raise error.Error('Seed must be a non-negative integer or omitted, not {}'.format(seed))
+
+    seed = _seed(seed)
+
+    rng = np.random.RandomState()
+    rng.seed(_int_list_from_bigint(hash_seed(seed)))
+    return rng, seed
+
+def hash_seed(seed, max_bytes=8):
+    """Any given evaluation is likely to have many PRNG's active at
+    once. (Most commonly, because the environment is running in
+    multiple processes.) There's literature indicating that having
+    linear correlations between seeds of multiple PRNG's can correlate
+    the outputs:
+
+    http://blogs.unity3d.com/2015/01/07/a-primer-on-repeatable-random-numbers/
+    http://stackoverflow.com/questions/1554958/how-different-do-random-seeds-need-to-be
+    http://dl.acm.org/citation.cfm?id=1276928
+
+    Thus, for sanity we hash the seeds before using them. (This scheme
+    is likely not crypto-strength, but it should be good enough to get
+    rid of simple correlations.)
+    """
+    hash = hashlib.sha512(str(seed).encode('utf8')).digest()
+    return _bigint_from_bytes(hash[:max_bytes])
+
+def _seed(a=None, max_bytes=8):
+    """Create a strong random seed. Otherwise, Python 2 would seed using
+    the system time, which might be non-robust especially in the
+    presence of concurrency.
+
+    Args:
+        a (Optional[int, str]): None seeds from an operating system specific randomness source. If an int or str passed, all of the bits are used.
+    """
+    # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py
+    if a is None:
+        a = _bigint_from_bytes(os.urandom(max_bytes))
+    elif isinstance(a, str):
+        a = a.encode('utf8')
+        a += hashlib.sha512(a).digest()
+        a = _bigint_from_bytes(a[:max_bytes])
+    elif isinstance(a, integer_types):
+        a = a % 2**(8 * max_bytes)
+    else:
+        raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a))
+
+    return a
+
+# TODO: don't hardcode sizeof_int here
+def _bigint_from_bytes(bytes):
+    sizeof_int = 4
+    padding = sizeof_int - len(bytes) % sizeof_int
+    bytes += b'\0' * padding
+    int_count = int(len(bytes) / sizeof_int)
+    unpacked = struct.unpack("{}I".format(int_count), bytes)
+    accum = 0
+    for i, val in enumerate(unpacked):
+        accum += 2 ** (sizeof_int * 8 * i) * val
+    return accum
+
+def _int_list_from_bigint(bigint):
+    # Special case 0
+    if bigint < 0:
+        raise error.Error('Seed must be non-negative, not {}'.format(bigint))
+    elif bigint == 0:
+        return [0]
+
+    ints = []
+    while bigint > 0:
+        bigint, mod = divmod(bigint, 2 ** 32)
+        ints.append(mod)
+    return ints
--- a/gym/utils/tests/test_seeding.py
+++ b/gym/utils/tests/test_seeding.py
@@ -0,0 +1,16 @@
+from gym import error
+from gym.utils import seeding
+
+def test_invalid_seeds():
+    for seed in [-1, 'test']:
+        try:
+            seeding.np_random(seed)
+        except error.Error:
+            pass
+        else:
+            assert False, 'Invalid seed {} passed validation'.format(seed)
+
+def test_valid_seeds():
+    for seed in [0, 1]:
+        random, seed1 = seeding.np_random(seed)
+        assert seed == seed1