Rewrite env tests (#2867)

2025-08-23 15:04:20 +00:00 · 2022-06-16 14:29:13 +01:00
parent 71f11a0642
commit ffbf971171
37 changed files with 469 additions and 657 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -14,4 +14,4 @@ jobs:
             --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
             --tag gym-docker .
      - name: Run tests
-        run: docker run gym-docker pytest --import-mode=append
+        run: docker run gym-docker pytest
--- a/gym/envs/mujoco/init.py
+++ b/gym/envs/mujoco/init.py
@@ -1,4 +1,8 @@
 from gym.envs.mujoco.mujoco_env import MujocoEnv  # isort:skip
 from gym.envs.mujoco.mujoco_rendering import (  # isort:skip
    RenderContextOffscreen,
    Viewer,
 )
 # ^^^^^ so that user gets the correct error
 # message if mujoco is not installed correctly
@@ -9,7 +13,6 @@ from gym.envs.mujoco.humanoid import HumanoidEnv
 from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
 from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
 from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
 from gym.envs.mujoco.mujoco_rendering import RenderContextOffscreen, Viewer
 from gym.envs.mujoco.pusher import PusherEnv
 from gym.envs.mujoco.reacher import ReacherEnv
 from gym.envs.mujoco.swimmer import SwimmerEnv
--- a/tests/envs/register_during_make_env.py
+++ b/tests/envs/register_during_make_env.py
@@ -1,6 +1,7 @@
 import gym
 # todo, remove, use a generic testing environment
 class RegisterDuringMakeEnv(gym.Env):
    """Used in `test_registration.py` to check if `env.make` can import and register an env"""
--- a/tests/envs/spec_list.py
+++ b/tests/envs/spec_list.py
@@ -1,70 +0,0 @@
 from gym import envs, logger
 SKIP_MUJOCO_V3_WARNING_MESSAGE = (
    "Cannot run mujoco test because `mujoco-py` is not installed"
 )
 SKIP_MUJOCO_V4_WARNING_MESSAGE = (
    "Cannot run mujoco test because `mujoco` is not installed"
 )
 skip_mujoco_v3 = False
 try:
    import mujoco_py  # noqa:F401
 except ImportError:
    skip_mujoco_v3 = True
 skip_mujoco_v4 = False
 try:
    import mujoco  # noqa:F401
 except ImportError:
    skip_mujoco_v4 = True
 def should_skip_env_spec_for_tests(spec):
    # We skip tests for envs that require dependencies or are otherwise
    # troublesome to run frequently
    ep = spec.entry_point
    # Skip mujoco tests for pull request CI
    if (skip_mujoco_v3 or skip_mujoco_v4) and ep.startswith("gym.envs.mujoco"):
        return True
    try:
        import gym.envs.atari  # noqa:F401
    except ImportError:
        if ep.startswith("gym.envs.atari"):
            return True
    try:
        import Box2D  # noqa:F401
    except ImportError:
        if ep.startswith("gym.envs.box2d"):
            return True
    if (
        "GoEnv" in ep
        or "HexEnv" in ep
        or (
            ep.startswith("gym.envs.atari")
            and not spec.id.startswith("Pong")
            and not spec.id.startswith("Seaquest")
        )
    ):
        logger.warn(f"Skipping tests for env {ep}")
        return True
    return False
 def skip_mujoco_py_env_for_test(spec):
    ep = spec.entry_point
    version = spec.version
    if ep.startswith("gym.envs.mujoco") and version < 4:
        return True
    return False
 spec_list = [
    spec
    for spec in sorted(envs.registry.values(), key=lambda x: x.id)
    if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)
 ]
 spec_list_no_mujoco_py = [
    spec for spec in spec_list if not skip_mujoco_py_env_for_test(spec)
 ]
--- a/tests/envs/test_action_dim_check.py
+++ b/tests/envs/test_action_dim_check.py
@@ -1,93 +1,102 @@
 from typing import List
 import numpy as np
 import pytest
 import gym
-from gym import Env
+from gym import spaces
 from gym.envs.registration import EnvSpec
-from gym.spaces.box import Box
+from tests.envs.utils import all_testing_initialised_envs, mujoco_testing_env_specs
 from gym.spaces.discrete import Discrete
 from tests.envs.spec_list import (
    SKIP_MUJOCO_V3_WARNING_MESSAGE,
    skip_mujoco_v3,
    spec_list,
 )
 ENVIRONMENT_IDS = ("HalfCheetah-v2",)
 def filters_envs_action_space_type(
    env_spec_list: List[EnvSpec], action_space: type
 ) -> List[Env]:
    """Make environments of specific action_space type.
    This function returns a filtered list of environment from the spec_list that matches the action_space type.
    Args:
        env_spec_list (list): list of registered environments' specification
        action_space (gym.spaces.Space): action_space type
    """
    filtered_envs = []
    for spec in env_spec_list:
        env = gym.make(spec.id)
        if isinstance(env.action_space, action_space):
            filtered_envs.append(env)
    return filtered_envs
@pytest.mark.skipif(skip_mujoco_v3, reason=SKIP_MUJOCO_V3_WARNING_MESSAGE)
@pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS)
 def test_serialize_deserialize(environment_id):
    env = gym.make(environment_id)
    env.reset()
    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step([0.1])
    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(0.1)
@pytest.mark.parametrize("env", filters_envs_action_space_type(spec_list, Discrete))
 def test_discrete_actions_out_of_bound(env):
    """Test out of bound actions in Discrete action_space.
    In discrete action_space environments, `out-of-bound`
    actions are not allowed and should raise an exception.
    Args:
        env (gym.Env): the gym environment
    """
    env.reset()
    action_space = env.action_space
    upper_bound = action_space.start + action_space.n - 1
    with pytest.raises(Exception):
        env.step(upper_bound + 1)
@pytest.mark.parametrize(
-    ("env", "seed"),
+    "env_spec",
-    [(env, 42) for env in filters_envs_action_space_type(spec_list, Box)],
+    mujoco_testing_env_specs,
    ids=[env_spec.id for env_spec in mujoco_testing_env_specs],
 )
-def test_box_actions_out_of_bound(env, seed):
+def test_mujoco_action_dimensions(env_spec: EnvSpec):
    """Test that for all mujoco environment, mis-dimensioned actions, an error is raised.
    Types of mis-dimensioned actions:
     * Too few actions
     * Too many actions
     * Too few dimensions
     * Too many dimensions
     * Incorrect shape
    """
    env = env_spec.make(disable_env_checker=True)
    env.reset()
    # Too few actions
    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(env.action_space.sample()[1:])
    # Too many actions
    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(np.append(env.action_space.sample(), 0))
    # Too few dimensions
    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(0.1)
    # Too many dimensions
    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(np.expand_dims(env.action_space.sample(), 0))
    # Incorrect shape
    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(np.expand_dims(env.action_space.sample(), 1))
    env.close()
@pytest.mark.parametrize(
    "env",
    filter(
        lambda env: isinstance(env.action_space, spaces.Discrete),
        all_testing_initialised_envs,
    ),
 )
 def test_discrete_actions_out_of_bound(env: gym.Env):
    """Test out of bound actions in Discrete action_space.
    In discrete action_space environments, `out-of-bound`
    actions are not allowed and should raise an exception.
    Args:
        env (gym.Env): the gym environment
    """
    assert isinstance(env.action_space, spaces.Discrete)
    upper_bound = env.action_space.start + env.action_space.n - 1
    env.reset()
    with pytest.raises(Exception):
        env.step(upper_bound + 1)
    env.close()
 OOB_VALUE = 100
@pytest.mark.parametrize(
    "env", filter(lambda env: isinstance(env, spaces.Box), all_testing_initialised_envs)
 )
 def test_box_actions_out_of_bound(env: gym.Env):
    """Test out of bound actions in Box action_space.
    Environments with Box actions spaces perform clipping inside `step`.
    The expected behaviour is that an action `out-of-bound` has the same effect
    of an action with value exactly at the upper (or lower) bound.
    Args:
        env (gym.Env): the gym environment
        seed (int): seed value for determinism
    """
-    OOB_VALUE = 100
+    env.reset(seed=42)
-    env.reset(seed=seed)
+    oob_env = gym.make(env.spec.id, disable_env_checker=True)
-
+    oob_env.reset(seed=42)
    oob_env = gym.make(env.spec.id)
    oob_env.reset(seed=seed)
    assert isinstance(env.action_space, spaces.Box)
    dtype = env.action_space.dtype
    upper_bounds = env.action_space.high
    lower_bounds = env.action_space.low
@@ -113,3 +122,5 @@ def test_box_actions_out_of_bound(env, seed):
            oob_obs, _, _, _ = oob_env.step(oob_action)
            assert np.alltrue(obs == oob_obs)
    env.close()
--- a/tests/envs/test_atari_legacy_env_specs.py
+++ b/tests/envs/test_atari_legacy_env_specs.py
@@ -1,136 +0,0 @@
 from itertools import product
 import pytest
 from gym.envs.registration import registry
 pytest.importorskip("gym.envs.atari")
 def test_ale_legacy_env_specs():
    versions = ["-v0", "-v4"]
    suffixes = ["", "NoFrameskip", "Deterministic"]
    obs_types = ["", "-ram"]
    games = [
        "adventure",
        "air_raid",
        "alien",
        "amidar",
        "assault",
        "asterix",
        "asteroids",
        "atlantis",
        "bank_heist",
        "battle_zone",
        "beam_rider",
        "berzerk",
        "bowling",
        "boxing",
        "breakout",
        "carnival",
        "centipede",
        "chopper_command",
        "crazy_climber",
        "defender",
        "demon_attack",
        "double_dunk",
        "elevator_action",
        "enduro",
        "fishing_derby",
        "freeway",
        "frostbite",
        "gopher",
        "gravitar",
        "hero",
        "ice_hockey",
        "jamesbond",
        "journey_escape",
        "kangaroo",
        "krull",
        "kung_fu_master",
        "montezuma_revenge",
        "ms_pacman",
        "name_this_game",
        "phoenix",
        "pitfall",
        "pong",
        "pooyan",
        "private_eye",
        "qbert",
        "riverraid",
        "road_runner",
        "robotank",
        "seaquest",
        "skiing",
        "solaris",
        "space_invaders",
        "star_gunner",
        "tennis",
        "time_pilot",
        "tutankham",
        "up_n_down",
        "venture",
        "video_pinball",
        "wizard_of_wor",
        "yars_revenge",
        "zaxxon",
    ]
    # Convert snake case to camel case
    games = list(map(lambda x: x.title().replace("_", ""), games))
    specs = list(map("".join, product(games, obs_types, suffixes, versions)))
    """
    defaults:
        repeat_action_probability = 0.0
        full_action_space = False
        frameskip = (2, 5)
        game = "Pong"
        obs_type = "ram"
        mode = None
        difficulty = None
    v0: repeat_action_probability = 0.25
    v4: inherits defaults
    -NoFrameskip: frameskip = 1
    -Deterministic: frameskip = 4 or 3 for space_invaders
    """
    for spec in specs:
        assert spec in registry.env_specs
        kwargs = registry.env_specs[spec]._kwargs
        # Assert necessary parameters are set
        assert "frameskip" in kwargs
        assert "game" in kwargs
        assert "obs_type" in kwargs
        assert "repeat_action_probability" in kwargs
        assert "full_action_space" in kwargs
        # Common defaults
        assert kwargs["full_action_space"] is False
        assert "mode" not in kwargs
        assert "difficulty" not in kwargs
        if "-ram" in spec:
            assert kwargs["obs_type"] == "ram"
        else:
            assert kwargs["obs_type"] == "rgb"
        if "NoFrameskip" in spec:
            assert kwargs["frameskip"] == 1
        elif "Deterministic" in spec:
            assert isinstance(kwargs["frameskip"], int)
            frameskip = 3 if "SpaceInvaders" in spec else 4
            assert kwargs["frameskip"] == frameskip
        else:
            assert isinstance(kwargs["frameskip"], tuple) and kwargs["frameskip"] == (
                2,
                5,
            )
        assert spec.endswith("v0") or spec.endswith("v4")
        if spec.endswith("v0"):
            assert kwargs["repeat_action_probability"] == 0.25
        elif spec.endswith("v4"):
            assert kwargs["repeat_action_probability"] == 0.0
--- a/tests/envs/test_bipedal_walker.py
+++ b/tests/envs/test_bipedal_walker.py
@@ -1,41 +0,0 @@
 """Test BipedalWalker environment."""
 import pytest
 from gym.envs.box2d import BipedalWalker
@pytest.mark.parametrize("seed", range(10))
 def test_bipedal_walker_hardcore_creation(seed: int):
    """Test BipedalWalker hardcore creation.
    BipedalWalker with `hardcore=True` should have ladders
    stumps and pitfalls. A convenient way to identify if ladders,
    stumps and pitfall are created is checking whether the terrain
    has that particular terrain color.
    Args:
        seed (int): environment seed
    """
    HC_TERRAINS_COLOR1 = (255, 255, 255)
    HC_TERRAINS_COLOR2 = (153, 153, 153)
    env = BipedalWalker(hardcore=False)
    env.reset(seed=seed)
    hc_env = BipedalWalker(hardcore=True)
    hc_env.reset(seed=seed)
    for terrain in env.terrain:
        assert terrain.color1 != HC_TERRAINS_COLOR1
        assert terrain.color2 != HC_TERRAINS_COLOR2
    hc_terrains_color1_count = 0
    hc_terrains_color2_count = 0
    for terrain in hc_env.terrain:
        if terrain.color1 == HC_TERRAINS_COLOR1:
            hc_terrains_color1_count += 1
        if terrain.color2 == HC_TERRAINS_COLOR2:
            hc_terrains_color2_count += 1
    assert hc_terrains_color1_count > 0
    assert hc_terrains_color2_count > 0
--- a/tests/envs/test_determinism.py
+++ b/tests/envs/test_determinism.py
@@ -1,83 +0,0 @@
 """Test environment determinism by performing a rollout."""
 import pytest
 from gym.utils.env_checker import data_equivalence
 from tests.envs.spec_list import spec_list
@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
 def test_env(spec):
    """Run a rollout with two environments and assert equality.
    This test run a rollout of NUM_STEPS steps with two environments
    initialized with the same seed and assert that:
    - observation after first reset are the same
    - same actions are sampled by the two envs
    - observations are contained in the observation space
    - obs, rew, done and info are equals between the two envs
    Args:
        spec (EnvSpec): Environment specification
    """
    # Note that this precludes running this test in multiple
    # threads. However, we probably already can't do multithreading
    # due to some environments.
    SEED = 0
    NUM_STEPS = 50
    env1, env2 = spec.make(), spec.make()
    initial_observation1 = env1.reset(seed=SEED)
    initial_observation2 = env2.reset(seed=SEED)
    env1.action_space.seed(SEED)
    env2.action_space.seed(SEED)
    assert data_equivalence(
        initial_observation1, initial_observation2
    ), f"Initial Observations 1 and 2 are not equivalent. initial obs 1={initial_observation1}, initial obs 2={initial_observation2}"
    for i in range(NUM_STEPS):
        action1 = env1.action_space.sample()
        action2 = env2.action_space.sample()
        try:
            assert data_equivalence(
                action1, action2
            ), f"Action 1 and 2 are not equivalent. action 1={action1}, action 2={action2}"
        except AssertionError:
            print(f"env 1 action space={env1.action_space}")
            print(f"env 2 action space={env2.action_space}")
            print(f"[{i}] action sample 1={action1}, action sample 2={action2}")
            raise
        # Don't check rollout equality if it's a nondeterministic
        # environment.
        if spec.nondeterministic:
            return
        obs1, rew1, done1, info1 = env1.step(action1)
        obs2, rew2, done2, info2 = env2.step(action2)
        assert data_equivalence(
            obs1, obs2
        ), f"Observation 1 and 2 are not equivalent. obs 1={obs1}, obs 2={obs2}"
        assert env1.observation_space.contains(obs1)
        assert env2.observation_space.contains(obs2)
        assert rew1 == rew2, f"[{i}] reward1: {rew1}, reward2: {rew2}"
        assert done1 == done2, f"[{i}] done1: {done1}, done2: {done2}"
        assert data_equivalence(
            info1, info2
        ), f"Info 1 and 2 are not equivalent. info 1={info1}, info 2={info2}"
        if done1:  # done2 verified in previous assertion
            env1.reset(seed=SEED)
            env2.reset(seed=SEED)
    env1.close()
    env2.close()
--- a/tests/envs/test_env_implementation.py
+++ b/tests/envs/test_env_implementation.py
@@ -0,0 +1,82 @@
 import pytest
 import gym
 from gym.envs.box2d import BipedalWalker
 from gym.envs.box2d.lunar_lander import demo_heuristic_lander
 from gym.envs.toy_text.frozen_lake import generate_random_map
 def test_lunar_lander_heuristics():
    lunar_lander = gym.make("LunarLander-v2", disable_env_checker=True)
    total_reward = demo_heuristic_lander(lunar_lander, seed=1)
    assert total_reward > 100
@pytest.mark.parametrize("seed", range(5))
 def test_bipedal_walker_hardcore_creation(seed: int):
    """Test BipedalWalker hardcore creation.
    BipedalWalker with `hardcore=True` should have ladders
    stumps and pitfalls. A convenient way to identify if ladders,
    stumps and pitfall are created is checking whether the terrain
    has that particular terrain color.
    Args:
        seed (int): environment seed
    """
    HC_TERRAINS_COLOR1 = (255, 255, 255)
    HC_TERRAINS_COLOR2 = (153, 153, 153)
    env = gym.make("BipedalWalker-v3", disable_env_checker=True).unwrapped
    hc_env = gym.make("BipedalWalkerHardcore-v3", disable_env_checker=True).unwrapped
    assert isinstance(env, BipedalWalker) and isinstance(hc_env, BipedalWalker)
    assert env.hardcore is False and hc_env.hardcore is True
    env.reset(seed=seed)
    hc_env.reset(seed=seed)
    for terrain in env.terrain:
        assert terrain.color1 != HC_TERRAINS_COLOR1
        assert terrain.color2 != HC_TERRAINS_COLOR2
    hc_terrains_color1_count = 0
    hc_terrains_color2_count = 0
    for terrain in hc_env.terrain:
        if terrain.color1 == HC_TERRAINS_COLOR1:
            hc_terrains_color1_count += 1
        if terrain.color2 == HC_TERRAINS_COLOR2:
            hc_terrains_color2_count += 1
    assert hc_terrains_color1_count > 0
    assert hc_terrains_color2_count > 0
@pytest.mark.parametrize("map_size", [5, 10, 16])
 def test_frozenlake_dfs_map_generation(map_size: int):
    """Frozenlake has the ability to generate random maps.
    This function checks that the random maps will always be possible to solve for sizes 5, 10, 16,
    currently only 8x8 maps can be generated.
    """
    new_frozenlake = generate_random_map(map_size)
    assert len(new_frozenlake) == map_size
    assert len(new_frozenlake[0]) == map_size
    # Runs a depth first search through the map to find the path.
    directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
    frontier, discovered = [], set()
    frontier.append((0, 0))
    while frontier:
        row, col = frontier.pop()
        if (row, col) not in discovered:
            discovered.add((row, col))
            for row_direction, col_direction in directions:
                new_row = row + row_direction
                new_col = col + col_direction
                if 0 <= new_row < map_size and 0 <= new_col < map_size:
                    if new_frozenlake[new_row][new_col] == "G":
                        return  # Successful, a route through the map was found
                    if new_frozenlake[new_row][new_col] not in "#H":
                        frontier.append((new_row, new_col))
    raise AssertionError("No path through the frozenlake was found.")
--- a/tests/envs/test_envs.py
+++ b/tests/envs/test_envs.py
@@ -1,79 +1,84 @@
 from typing import List
 import numpy as np
 import pytest
-from gym import envs
+from gym.envs.registration import EnvSpec
 from gym.spaces import Box
 from gym.utils.env_checker import check_env
-from tests.envs.spec_list import spec_list, spec_list_no_mujoco_py
+from tests.envs.utils import all_testing_env_specs, assert_equals, gym_testing_env_specs
 # This runs a smoketest on each official registered env. We may want
 # to try also running environments which are not officially registered
 # envs.
-@pytest.mark.filterwarnings(
+
-    "ignore:.*We recommend you to use a symmetric and normalized Box action space.*"
+
 )
@pytest.mark.parametrize(
-    "spec", spec_list_no_mujoco_py, ids=[spec.id for spec in spec_list_no_mujoco_py]
+    "env_spec", gym_testing_env_specs, ids=[spec.id for spec in gym_testing_env_specs]
 )
-def test_env(spec):
+def test_run_env_checker(env_spec: EnvSpec):
-    # Capture warnings
+    """Runs the gym environment checker on the environment spec that calls the `reset`, `step` and `render`."""
-    with pytest.warns(None) as warnings:
+    env = env_spec.make(disable_env_checker=True)
-        env = spec.make()
+    check_env(env, skip_render_check=False)
    # Test if env adheres to Gym API
    check_env(env, skip_render_check=True)
    # Check that dtype is explicitly declared for gym.Box spaces
    for warning_msg in warnings:
        assert "autodetected dtype" not in str(warning_msg.message)
    ob_space = env.observation_space
    act_space = env.action_space
    ob = env.reset()
    assert ob_space.contains(ob), f"Reset observation: {ob!r} not in space"
    if isinstance(ob_space, Box):
        # Only checking dtypes for Box spaces to avoid iterating through tuple entries
        assert (
            ob.dtype == ob_space.dtype
        ), f"Reset observation dtype: {ob.dtype}, expected: {ob_space.dtype}"
    a = act_space.sample()
    observation, reward, done, _info = env.step(a)
    assert ob_space.contains(
        observation
    ), f"Step observation: {observation!r} not in space"
    assert np.isscalar(reward), f"{reward} is not a scalar for {env}"
    assert isinstance(done, bool), f"Expected {done} to be a boolean"
    if isinstance(ob_space, Box):
        assert (
            observation.dtype == ob_space.dtype
        ), f"Step observation dtype: {ob.dtype}, expected: {ob_space.dtype}"
    env.close()
-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+# Note that this precludes running this test in multiple threads.
-def test_reset_info(spec):
+# However, we probably already can't do multithreading due to some environments.
-
+SEED = 0
-    with pytest.warns(None):
+NUM_STEPS = 50
        env = spec.make()
    ob_space = env.observation_space
    obs = env.reset()
    assert ob_space.contains(obs)
    obs = env.reset(return_info=False)
    assert ob_space.contains(obs)
    obs, info = env.reset(return_info=True)
    assert ob_space.contains(obs)
    assert isinstance(info, dict)
    env.close()
@pytest.mark.parametrize(
-    "spec", spec_list_no_mujoco_py, ids=[spec.id for spec in spec_list_no_mujoco_py]
+    "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs]
 )
 def test_env_determinism_rollout(env_spec: EnvSpec):
    """Run a rollout with two environments and assert equality.
    This test run a rollout of NUM_STEPS steps with two environments
    initialized with the same seed and assert that:
    - observation after first reset are the same
    - same actions are sampled by the two envs
    - observations are contained in the observation space
    - obs, rew, done and info are equals between the two envs
    """
    # Don't check rollout equality if it's a nondeterministic environment.
    if env_spec.nondeterministic is True:
        return
    env_1 = env_spec.make(disable_env_checker=True)
    env_2 = env_spec.make(disable_env_checker=True)
    initial_obs_1 = env_1.reset(seed=SEED)
    initial_obs_2 = env_2.reset(seed=SEED)
    assert_equals(initial_obs_1, initial_obs_2)
    env_1.action_space.seed(SEED)
    for time_step in range(NUM_STEPS):
        # We don't evaluate the determinism of actions
        action = env_1.action_space.sample()
        obs_1, rew_1, done_1, info_1 = env_1.step(action)
        obs_2, rew_2, done_2, info_2 = env_2.step(action)
        assert_equals(obs_1, obs_2, f"[{time_step}] ")
        assert env_1.observation_space.contains(
            obs_1
        )  # obs_2 verified by previous assertion
        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
        assert done_1 == done_2, f"[{time_step}] done 1={done_1}, done 2={done_2}"
        assert_equals(info_1, info_2, f"[{time_step}] ")
        if done_1:  # done_2 verified by previous assertion
            env_1.reset(seed=SEED)
            env_2.reset(seed=SEED)
    env_1.close()
    env_2.close()
@pytest.mark.parametrize(
    "spec", gym_testing_env_specs, ids=[spec.id for spec in gym_testing_env_specs]
 )
 def test_render_modes(spec):
    env = spec.make()
@@ -85,17 +90,3 @@ def test_render_modes(spec):
            new_env.reset()
            new_env.step(new_env.action_space.sample())
            new_env.render()
 def test_env_render_result_is_immutable():
    environs = [
        envs.make("Taxi-v3", render_mode="ansi"),
        envs.make("FrozenLake-v1", render_mode="ansi"),
    ]
    for env in environs:
        env.reset()
        output = env.render()
        assert isinstance(output, List)
        assert isinstance(output[0], str)
        env.close()
--- a/tests/envs/test_frozenlake_dfs.py
+++ b/tests/envs/test_frozenlake_dfs.py
@@ -1,30 +0,0 @@
 from gym.envs.toy_text.frozen_lake import generate_random_map
 # Test that FrozenLake map generation creates valid maps of various sizes.
 def test_frozenlake_dfs_map_generation():
    def frozenlake_dfs_path_exists(res):
        frontier, discovered = [], set()
        frontier.append((0, 0))
        while frontier:
            r, c = frontier.pop()
            if not (r, c) in discovered:
                discovered.add((r, c))
                directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
                for x, y in directions:
                    r_new = r + x
                    c_new = c + y
                    if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
                        continue
                    if res[r_new][c_new] == "G":
                        return True
                    if res[r_new][c_new] not in "#H":
                        frontier.append((r_new, c_new))
        return False
    map_sizes = [5, 10, 200]
    for size in map_sizes:
        new_frozenlake = generate_random_map(size)
        assert len(new_frozenlake) == size
        assert len(new_frozenlake[0]) == size
        assert frozenlake_dfs_path_exists(new_frozenlake)
--- a/tests/envs/test_lunar_lander.py
+++ b/tests/envs/test_lunar_lander.py
@@ -1,24 +0,0 @@
 import pytest
 try:
    import Box2D
    from gym.envs.box2d.lunar_lander import LunarLander, demo_heuristic_lander
 except ImportError:
    Box2D = None
@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
 def test_lunar_lander():
    _test_lander(LunarLander(), seed=0)
@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
 def test_lunar_lander_continuous():
    _test_lander(LunarLander(continuous=True), seed=0)
@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
 def _test_lander(env, seed=None, render=False):
    total_reward = demo_heuristic_lander(env, seed=seed, render=render)
    assert total_reward > 100
--- a/tests/envs/test_mujoco.py
+++ b/tests/envs/test_mujoco.py
@@ -0,0 +1,57 @@
 import numpy as np
 import pytest
 import gym
 from gym import envs
 from tests.envs.utils import mujoco_testing_env_specs
 EPS = 1e-6
 def verify_environments_match(
    old_env_id: str, new_env_id: str, seed: int = 1, num_actions: int = 1000
 ):
    """Verifies with two environment ids (old and new) are identical in obs, reward and done
    (except info where all old info must be contained in new info)."""
    old_env = envs.make(old_env_id, disable_env_checker=True)
    new_env = envs.make(new_env_id, disable_env_checker=True)
    old_reset_obs = old_env.reset(seed=seed)
    new_reset_obs = new_env.reset(seed=seed)
    np.testing.assert_allclose(old_reset_obs, new_reset_obs)
    for i in range(num_actions):
        action = old_env.action_space.sample()
        old_obs, old_reward, old_done, old_info = old_env.step(action)
        new_obs, new_reward, new_done, new_info = new_env.step(action)
        np.testing.assert_allclose(old_obs, new_obs, atol=EPS)
        np.testing.assert_allclose(old_reward, new_reward, atol=EPS)
        np.testing.assert_equal(old_done, new_done)
        for key in old_info:
            np.testing.assert_allclose(old_info[key], new_info[key], atol=EPS)
        if old_done:
            break
 MUJOCO_V2_V3_ENVS = [
    spec.name
    for spec in mujoco_testing_env_specs
    if spec.version == 2 and f"{spec.name}-v3" in gym.envs.registry
 ]
@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
 def test_mujoco_v2_to_v3_conversion(env_name: str):
    """Checks that all v2 mujoco environments are the same as v3 environments."""
    verify_environments_match(f"{env_name}-v2", f"{env_name}-v3")
@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
 def test_mujoco_incompatible_v3_to_v2(env_name: str):
    """Checks that the v3 environment are slightly different from v2, (v3 has additional info keys that v2 does not)."""
    with pytest.raises(KeyError):
        verify_environments_match(f"{env_name}-v3", f"{env_name}-v2")
--- a/tests/envs/test_mujoco_v2_to_v3_conversion.py
+++ b/tests/envs/test_mujoco_v2_to_v3_conversion.py
@@ -1,63 +0,0 @@
 import unittest
 import numpy as np
 from gym import envs
 from tests.envs.spec_list import SKIP_MUJOCO_V3_WARNING_MESSAGE, skip_mujoco_v3
 def verify_environments_match(
    old_environment_id, new_environment_id, seed=1, num_actions=1000
 ):
    old_environment = envs.make(old_environment_id)
    new_environment = envs.make(new_environment_id)
    old_reset_observation = old_environment.reset(seed=seed)
    new_reset_observation = new_environment.reset(seed=seed)
    np.testing.assert_allclose(old_reset_observation, new_reset_observation)
    for i in range(num_actions):
        action = old_environment.action_space.sample()
        old_observation, old_reward, old_done, old_info = old_environment.step(action)
        new_observation, new_reward, new_done, new_info = new_environment.step(action)
        eps = 1e-6
        np.testing.assert_allclose(old_observation, new_observation, atol=eps)
        np.testing.assert_allclose(old_reward, new_reward, atol=eps)
        np.testing.assert_allclose(old_done, new_done, atol=eps)
        for key in old_info:
            np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)
@unittest.skipIf(skip_mujoco_v3, SKIP_MUJOCO_V3_WARNING_MESSAGE)
 class Mujocov2Tov3ConversionTest(unittest.TestCase):
    def test_environments_match(self):
        test_cases = (
            {"old_id": "Swimmer-v2", "new_id": "Swimmer-v3"},
            {"old_id": "Hopper-v2", "new_id": "Hopper-v3"},
            {"old_id": "Walker2d-v2", "new_id": "Walker2d-v3"},
            {"old_id": "HalfCheetah-v2", "new_id": "HalfCheetah-v3"},
            {"old_id": "Ant-v2", "new_id": "Ant-v3"},
            {"old_id": "Humanoid-v2", "new_id": "Humanoid-v3"},
        )
        for test_case in test_cases:
            verify_environments_match(test_case["old_id"], test_case["new_id"])
        # Raises KeyError because the new envs have extra info
        with self.assertRaises(KeyError):
            verify_environments_match("Swimmer-v3", "Swimmer-v2")
        # Raises KeyError because the new envs have extra info
        with self.assertRaises(KeyError):
            verify_environments_match("Humanoid-v3", "Humanoid-v2")
        # Raises KeyError because the new envs have extra info
        with self.assertRaises(KeyError):
            verify_environments_match("Swimmer-v3", "Swimmer-v2")
 if __name__ == "__main__":
    unittest.main()
--- a/tests/envs/test_registration.py
+++ b/tests/envs/test_registration.py
@@ -62,7 +62,7 @@ def register_some_envs():
 def test_make():
-    env = envs.make("CartPole-v1")
+    env = envs.make("CartPole-v1", disable_env_checker=True)
    assert env.spec.id == "CartPole-v1"
    assert isinstance(env.unwrapped, cartpole.CartPoleEnv)
@@ -128,7 +128,7 @@ def test_env_suggestions(register_some_envs, env_id_input, env_id_suggested):
    with pytest.raises(
        error.UnregisteredEnv, match=f"Did you mean: `{env_id_suggested}` ?"
    ):
-        envs.make(env_id_input)
+        gym.make(env_id_input, disable_env_checker=True)
@pytest.mark.parametrize(
@@ -151,14 +151,14 @@ def test_env_version_suggestions(
            error.DeprecatedEnv,
            match=match_str,
        ):
-            envs.make(env_id_input)
+            envs.make(env_id_input, disable_env_checker=True)
    else:
        match_str = f"versioned environments: \\[ {suggested_versions} \\]"
        with pytest.raises(
            error.UnregisteredEnv,
            match=match_str,
        ):
-            envs.make(env_id_input)
+            envs.make(env_id_input, disable_env_checker=True)
 def test_make_with_kwargs():
@@ -166,6 +166,7 @@ def test_make_with_kwargs():
        "test.ArgumentEnv-v0",
        arg2="override_arg2",
        arg3="override_arg3",
        disable_env_checker=True,
    )
    assert env.spec.id == "test.ArgumentEnv-v0"
    assert isinstance(env.unwrapped, ArgumentEnv)
@@ -194,7 +195,10 @@ def test_spec():
 def test_spec_with_kwargs():
    map_name_value = "8x8"
-    env = gym.make("FrozenLake-v1", map_name=map_name_value)
+    env = gym.make(
        "FrozenLake-v1",
        map_name=map_name_value,
    )
    assert env.spec.kwargs["map_name"] == map_name_value
@@ -277,7 +281,9 @@ def test_register_versioned_unversioned():
 def test_return_latest_versioned_env(register_some_envs):
    with pytest.warns(UserWarning):
-        env = envs.make("MyAwesomeNamespace/MyAwesomeVersionedEnv")
+        env = envs.make(
            "MyAwesomeNamespace/MyAwesomeVersionedEnv", disable_env_checker=True
        )
    assert env.spec.id == "MyAwesomeNamespace/MyAwesomeVersionedEnv-v5"
@@ -295,4 +301,7 @@ def test_namespace():
 def test_import_module_during_make():
    # Test custom environment which is registered at make
-    gym.make("tests.envs.register_during_make_env:RegisterDuringMakeEnv-v0")
+    gym.make(
        "tests.envs.register_during_make_env:RegisterDuringMakeEnv-v0",
        disable_env_checker=True,
    )
--- a/tests/envs/utils.py
+++ b/tests/envs/utils.py
@@ -0,0 +1,78 @@
 """Finds all the specs that we can test with"""
 from typing import Optional
 import numpy as np
 import gym
 from gym import logger
 from gym.envs.registration import EnvSpec
 def try_make_env(env_spec: EnvSpec) -> Optional[gym.Env]:
    """Tries to make the environment showing if it is possible. Warning the environments have no wrappers, including time limit and order enforcing."""
    try:
        return env_spec.make(disable_env_checker=True).unwrapped
    except ImportError as e:
        logger.warn(f"Not testing {env_spec.id} due to error: {e}")
        return None
 # Tries to make all environment to test with
 all_testing_initialised_envs = list(
    filter(None, [try_make_env(env_spec) for env_spec in gym.envs.registry.values()])
 )
 # All testing, mujoco and gym environment specs
 all_testing_env_specs = [env.spec for env in all_testing_initialised_envs]
 mujoco_testing_env_specs = [
    env_spec
    for env_spec in all_testing_env_specs
    if "gym.envs.mujoco" in env_spec.entry_point
 ]
 gym_testing_env_specs = [
    env_spec
    for env_spec in all_testing_env_specs
    if any(
        f"gym.{ep}" in env_spec.entry_point
        for ep in ["box2d", "classic_control", "toy_text"]
    )
 ]
 # TODO, add minimum testing env spec in testing
 minimum_testing_env_specs = [
    env_spec
    for env_spec in [
        "CartPole-v1",
        "MountainCarContinuous-v0",
        "LunarLander-v2",
        "LunarLanderContinuous-v2",
        "CarRacing-v1",
        "Blackjack-v1",
        "Reacher-v4",
    ]
    if env_spec in all_testing_env_specs
 ]
 def assert_equals(a, b, prefix=None):
    """Assert equality of data structures `a` and `b`.
    Args:
        a: first data structure
        b: second data structure
        prefix: prefix for failed assertion message for types and dicts
    """
    assert type(a) == type(b), f"{prefix}Differing types: {a} and {b}"
    if isinstance(a, dict):
        assert list(a.keys()) == list(b.keys()), f"{prefix}Key sets differ: {a} and {b}"
        for k in a.keys():
            v_a = a[k]
            v_b = b[k]
            assert_equals(v_a, v_b)
    elif isinstance(a, np.ndarray):
        np.testing.assert_array_equal(a, b)
    elif isinstance(a, tuple):
        for elem_from_a, elem_from_b in zip(a, b):
            assert_equals(elem_from_a, elem_from_b)
    else:
        assert a == b
--- a/tests/utils/test_play.py
+++ b/tests/utils/test_play.py
@@ -168,7 +168,7 @@ def test_play_loop_real_env():
        return obs_t, obs_tp1, action, rew, done, info
-    env = gym.make(ENV)
+    env = gym.make(ENV, disable_env_checker=True)
    env.reset(seed=SEED)
    keys_to_action = dummy_keys_to_action()
@@ -179,7 +179,7 @@ def test_play_loop_real_env():
        action = keys_to_action[(e.key,)]
        obs, _, _, _ = env.step(action)
-    env_play = gym.make(ENV)
+    env_play = gym.make(ENV, disable_env_checker=True)
    status = PlayStatus(callback)
    play(env_play, callback=status.callback, keys_to_action=keys_to_action, seed=SEED)
--- a/tests/vector/test_sync_vector_env.py
+++ b/tests/vector/test_sync_vector_env.py
@@ -4,7 +4,7 @@ import pytest
 from gym.envs.registration import EnvSpec
 from gym.spaces import Box, Discrete, MultiDiscrete, Tuple
 from gym.vector.sync_vector_env import SyncVectorEnv
-from tests.envs.spec_list import spec_list
+from tests.envs.utils import all_testing_env_specs
 from tests.vector.utils import (
    CustomSpace,
    assert_rng_equal,
@@ -188,7 +188,9 @@ def test_sync_vector_env_seed():
        assert np.all(env_action == vector_action)
-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_sync_vector_determinism(spec: EnvSpec, seed: int = 123, n: int = 3):
    """Check that for all environments, the sync vector envs produce the same action samples using the same seeds"""
    env_1 = SyncVectorEnv([make_env(spec.id, seed=seed) for _ in range(n)])
--- a/tests/vector/test_vector_env_info.py
+++ b/tests/vector/test_vector_env_info.py
@@ -13,7 +13,9 @@ SEED = 42
@pytest.mark.parametrize("asynchronous", [True, False])
 def test_vector_env_info(asynchronous):
-    env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, asynchronous=asynchronous)
+    env = gym.vector.make(
        ENV_ID, num_envs=NUM_ENVS, asynchronous=asynchronous, disable_env_checker=True
    )
    env.reset(seed=SEED)
    for _ in range(ENV_STEPS):
        env.action_space.seed(SEED)
--- a/tests/vector/utils.py
+++ b/tests/vector/utils.py
@@ -109,7 +109,7 @@ class CustomSpaceEnv(gym.Env):
 def make_env(env_name, seed, **kwargs):
    def _make():
-        env = gym.make(env_name, **kwargs)
+        env = gym.make(env_name, disable_env_checker=True, **kwargs)
        env.action_space.seed(seed)
        env.reset(seed=seed)
        return env
--- a/tests/wrappers/test_atari_preprocessing.py
+++ b/tests/wrappers/test_atari_preprocessing.py
@@ -9,7 +9,7 @@ pytest.importorskip("gym.envs.atari")
@pytest.fixture(scope="module")
 def env_fn():
-    return lambda: gym.make("PongNoFrameskip-v4")
+    return lambda: gym.make("PongNoFrameskip-v4", disable_env_checker=True)
 def test_atari_preprocessing_grayscale(env_fn):
--- a/tests/wrappers/test_autoreset.py
+++ b/tests/wrappers/test_autoreset.py
@@ -8,7 +8,7 @@ import pytest
 import gym
 from gym.wrappers import AutoResetWrapper
-from tests.envs.spec_list import spec_list
+from tests.envs.utils import all_testing_env_specs
 class DummyResetEnv(gym.Env):
@@ -61,7 +61,9 @@ def unwrap_env(env) -> Generator[gym.Wrapper, None, None]:
        env = env.env
-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_make_autoreset_true(spec):
    """Tests gym.make with `autoreset=True`, and check that the reset actually happens.
@@ -71,7 +73,7 @@ def test_make_autoreset_true(spec):
     amount of time with random actions, which is true as of the time of adding this test.
    """
    with pytest.warns(None):
-        env = gym.make(spec.id, autoreset=True)
+        env = gym.make(spec.id, autoreset=True, disable_env_checker=True)
    assert AutoResetWrapper in unwrap_env(env)
    env.reset(seed=0)
@@ -85,21 +87,23 @@ def test_make_autoreset_true(spec):
    env.close()
-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_gym_make_autoreset(spec):
    """Tests that `gym.make` autoreset wrapper is applied only when `gym.make(..., autoreset=True)`."""
    with pytest.warns(None):
-        env = gym.make(spec.id)
+        env = gym.make(spec.id, disable_env_checker=True)
    assert AutoResetWrapper not in unwrap_env(env)
    env.close()
    with pytest.warns(None):
-        env = gym.make(spec.id, autoreset=False)
+        env = gym.make(spec.id, autoreset=False, disable_env_checker=True)
    assert AutoResetWrapper not in unwrap_env(env)
    env.close()
    with pytest.warns(None):
-        env = gym.make(spec.id, autoreset=True)
+        env = gym.make(spec.id, autoreset=True, disable_env_checker=True)
    assert AutoResetWrapper in unwrap_env(env)
    env.close()
--- a/tests/wrappers/test_clip_action.py
+++ b/tests/wrappers/test_clip_action.py
@@ -6,8 +6,10 @@ from gym.wrappers import ClipAction
 def test_clip_action():
    # mountaincar: action-based rewards
-    env = gym.make("MountainCarContinuous-v0")
+    env = gym.make("MountainCarContinuous-v0", disable_env_checker=True)
-    wrapped_env = ClipAction(gym.make("MountainCarContinuous-v0"))
+    wrapped_env = ClipAction(
        gym.make("MountainCarContinuous-v0", disable_env_checker=True)
    )
    seed = 0
--- a/tests/wrappers/test_flatten_observation.py
+++ b/tests/wrappers/test_flatten_observation.py
@@ -8,7 +8,7 @@ from gym.wrappers import FlattenObservation
@pytest.mark.parametrize("env_id", ["Blackjack-v1"])
 def test_flatten_observation(env_id):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    wrapped_env = FlattenObservation(env)
    obs = env.reset()
--- a/tests/wrappers/test_frame_stack.py
+++ b/tests/wrappers/test_frame_stack.py
@@ -28,13 +28,13 @@ pytest.importorskip("gym.envs.atari")
    ],
 )
 def test_frame_stack(env_id, num_stack, lz4_compress):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    shape = env.observation_space.shape
    env = FrameStack(env, num_stack, lz4_compress)
    assert env.observation_space.shape == (num_stack,) + shape
    assert env.observation_space.dtype == env.env.observation_space.dtype
-    dup = gym.make(env_id)
+    dup = gym.make(env_id, disable_env_checker=True)
    obs = env.reset(seed=0)
    dup_obs = dup.reset(seed=0)
--- a/tests/wrappers/test_gray_scale_observation.py
+++ b/tests/wrappers/test_gray_scale_observation.py
@@ -13,8 +13,12 @@ pytest.importorskip("cv2")
 )
@pytest.mark.parametrize("keep_dim", [True, False])
 def test_gray_scale_observation(env_id, keep_dim):
-    gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True)
+    gray_env = AtariPreprocessing(
-    rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False)
+        gym.make(env_id, disable_env_checker=True), screen_size=84, grayscale_obs=True
    )
    rgb_env = AtariPreprocessing(
        gym.make(env_id, disable_env_checker=True), screen_size=84, grayscale_obs=False
    )
    wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
    assert rgb_env.observation_space.shape[-1] == 3
--- a/tests/wrappers/test_order_enforcing.py
+++ b/tests/wrappers/test_order_enforcing.py
@@ -4,14 +4,16 @@ import gym
 from gym.envs.classic_control import CartPoleEnv
 from gym.error import ResetNeeded
 from gym.wrappers import OrderEnforcing
-from tests.envs.spec_list import spec_list
+from tests.envs.utils import all_testing_env_specs
 from tests.wrappers.utils import has_wrapper
-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
 )
 def test_gym_make_order_enforcing(spec):
    """Checks that gym.make wrappers the environment with the OrderEnforcing wrapper."""
-    env = gym.make(spec.id)
+    env = gym.make(spec.id, disable_env_checker=True)
    assert has_wrapper(env, OrderEnforcing)
--- a/tests/wrappers/test_record_episode_statistics.py
+++ b/tests/wrappers/test_record_episode_statistics.py
@@ -9,7 +9,7 @@ from gym.wrappers.record_episode_statistics import add_vector_episode_statistics
@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
@pytest.mark.parametrize("deque_size", [2, 5])
 def test_record_episode_statistics(env_id, deque_size):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    env = RecordEpisodeStatistics(env, deque_size)
    for n in range(5):
@@ -27,7 +27,7 @@ def test_record_episode_statistics(env_id, deque_size):
 def test_record_episode_statistics_reset_info():
-    env = gym.make("CartPole-v1")
+    env = gym.make("CartPole-v1", disable_env_checker=True)
    env = RecordEpisodeStatistics(env)
    ob_space = env.observation_space
    obs = env.reset()
@@ -43,7 +43,11 @@ def test_record_episode_statistics_reset_info():
 )
 def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous):
    envs = gym.vector.make(
-        "CartPole-v1", render_mode=None, num_envs=num_envs, asynchronous=asynchronous
+        "CartPole-v1",
        render_mode=None,
        num_envs=num_envs,
        asynchronous=asynchronous,
        disable_env_checker=True,
    )
    envs = RecordEpisodeStatistics(envs)
    max_episode_step = (
@@ -66,7 +70,7 @@ def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous):
 def test_wrong_wrapping_order():
-    envs = gym.vector.make("CartPole-v1", num_envs=3)
+    envs = gym.vector.make("CartPole-v1", num_envs=3, disable_env_checker=True)
    wrapped_env = RecordEpisodeStatistics(VectorListInfo(envs))
    wrapped_env.reset()
--- a/tests/wrappers/test_record_video.py
+++ b/tests/wrappers/test_record_video.py
@@ -6,8 +6,7 @@ from gym.wrappers import capped_cubic_video_schedule
 def test_record_video_using_default_trigger():
-
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.make("CartPole-v1", render_mode="rgb_array")
    env = gym.wrappers.RecordVideo(env, "videos")
    env.reset()
    for _ in range(199):
@@ -25,7 +24,7 @@ def test_record_video_using_default_trigger():
 def test_record_video_reset_return_info():
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    ob_space = env.observation_space
    obs, info = env.reset(return_info=True)
@@ -35,7 +34,7 @@ def test_record_video_reset_return_info():
    assert ob_space.contains(obs)
    assert isinstance(info, dict)
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    ob_space = env.observation_space
    obs = env.reset(return_info=False)
@@ -44,7 +43,7 @@ def test_record_video_reset_return_info():
    shutil.rmtree("videos")
    assert ob_space.contains(obs)
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    ob_space = env.observation_space
    obs = env.reset()
@@ -55,7 +54,7 @@ def test_record_video_reset_return_info():
 def test_record_video_step_trigger():
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env._max_episode_steps = 20
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    env.reset()
@@ -73,7 +72,7 @@ def test_record_video_step_trigger():
 def make_env(gym_id, seed, **kwargs):
    def thunk():
-        env = gym.make(gym_id, **kwargs)
+        env = gym.make(gym_id, disable_env_checker=True, **kwargs)
        env._max_episode_steps = 20
        if seed == 1:
            env = gym.wrappers.RecordVideo(
--- a/tests/wrappers/test_rescale_action.py
+++ b/tests/wrappers/test_rescale_action.py
@@ -6,13 +6,15 @@ from gym.wrappers import RescaleAction
 def test_rescale_action():
-    env = gym.make("CartPole-v1")
+    env = gym.make("CartPole-v1", disable_env_checker=True)
    with pytest.raises(AssertionError):
        env = RescaleAction(env, -1, 1)
    del env
-    env = gym.make("Pendulum-v1")
+    env = gym.make("Pendulum-v1", disable_env_checker=True)
-    wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1)
+    wrapped_env = RescaleAction(
        gym.make("Pendulum-v1", disable_env_checker=True), -1, 1
    )
    seed = 0
--- a/tests/wrappers/test_resize_observation.py
+++ b/tests/wrappers/test_resize_observation.py
@@ -11,7 +11,7 @@ pytest.importorskip("gym.envs.atari")
 )
@pytest.mark.parametrize("shape", [16, 32, (8, 5), [10, 7]])
 def test_resize_observation(env_id, shape):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    env = ResizeObservation(env, shape)
    assert env.observation_space.shape[-1] == 3
--- a/tests/wrappers/test_time_aware_observation.py
+++ b/tests/wrappers/test_time_aware_observation.py
@@ -6,7 +6,7 @@ from gym.wrappers import TimeAwareObservation
@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
 def test_time_aware_observation(env_id):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    wrapped_env = TimeAwareObservation(env)
    assert wrapped_env.observation_space.shape[0] == env.observation_space.shape[0] + 1
--- a/tests/wrappers/test_time_limit.py
+++ b/tests/wrappers/test_time_limit.py
@@ -6,7 +6,7 @@ from gym.wrappers import TimeLimit
 def test_time_limit_reset_info():
-    env = gym.make("CartPole-v1")
+    env = gym.make("CartPole-v1", disable_env_checker=True)
    env = TimeLimit(env)
    ob_space = env.observation_space
    obs = env.reset()
--- a/tests/wrappers/test_transform_observation.py
+++ b/tests/wrappers/test_transform_observation.py
@@ -10,9 +10,9 @@ def test_transform_observation(env_id):
    def affine_transform(x):
        return 3 * x + 2
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    wrapped_env = TransformObservation(
-        gym.make(env_id), lambda obs: affine_transform(obs)
+        gym.make(env_id, disable_env_checker=True), lambda obs: affine_transform(obs)
    )
    obs = env.reset(seed=0)
--- a/tests/wrappers/test_transform_reward.py
+++ b/tests/wrappers/test_transform_reward.py
@@ -10,8 +10,10 @@ def test_transform_reward(env_id):
    # use case #1: scale
    scales = [0.1, 200]
    for scale in scales:
-        env = gym.make(env_id)
+        env = gym.make(env_id, disable_env_checker=True)
-        wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r)
+        wrapped_env = TransformReward(
            gym.make(env_id, disable_env_checker=True), lambda r: scale * r
        )
        action = env.action_space.sample()
        env.reset(seed=0)
@@ -26,8 +28,10 @@ def test_transform_reward(env_id):
    # use case #2: clip
    min_r = -0.0005
    max_r = 0.0002
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
-    wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r))
+    wrapped_env = TransformReward(
        gym.make(env_id, disable_env_checker=True), lambda r: np.clip(r, min_r, max_r)
    )
    action = env.action_space.sample()
    env.reset(seed=0)
@@ -41,8 +45,10 @@ def test_transform_reward(env_id):
    del env, wrapped_env
    # use case #3: sign
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
-    wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r))
+    wrapped_env = TransformReward(
        gym.make(env_id, disable_env_checker=True), lambda r: np.sign(r)
    )
    env.reset(seed=0)
    wrapped_env.reset(seed=0)
--- a/tests/wrappers/test_vector_list_info.py
+++ b/tests/wrappers/test_vector_list_info.py
@@ -10,8 +10,8 @@ SEED = 42
 def test_usage_in_vector_env():
-    env = gym.make(ENV_ID)
+    env = gym.make(ENV_ID, disable_env_checker=True)
-    vector_env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
+    vector_env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True)
    VectorListInfo(vector_env)
@@ -20,7 +20,7 @@ def test_usage_in_vector_env():
 def test_info_to_list():
-    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
+    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True)
    wrapped_env = VectorListInfo(env_to_wrap)
    wrapped_env.action_space.seed(SEED)
    _, info = wrapped_env.reset(seed=SEED, return_info=True)
@@ -38,7 +38,7 @@ def test_info_to_list():
 def test_info_to_list_statistics():
-    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
+    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True)
    wrapped_env = VectorListInfo(RecordEpisodeStatistics(env_to_wrap))
    _, info = wrapped_env.reset(seed=SEED, return_info=True)
    wrapped_env.action_space.seed(SEED)
--- a/tests/wrappers/test_video_recorder.py
+++ b/tests/wrappers/test_video_recorder.py
@@ -29,7 +29,7 @@ class UnrecordableEnv:
 def test_record_simple():
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
@@ -49,7 +49,7 @@ def test_record_simple():
 def test_autoclose():
    def record():
-        env = gym.make("CartPole-v1", render_mode="rgb_array")
+        env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
        rec = VideoRecorder(env)
        env.reset()
        rec.capture_frame()
@@ -102,7 +102,7 @@ def test_record_breaking_render_method():
 def test_text_envs():
-    env = gym.make("FrozenLake-v1", render_mode="rgb_array")
+    env = gym.make("FrozenLake-v1", render_mode="rgb_array", disable_env_checker=True)
    video = VideoRecorder(env)
    try:
        env.reset()