Rewrite env tests (#2867)

2025-08-23 15:04:20 +00:00 · 2022-06-16 14:29:13 +01:00
parent 71f11a0642
commit ffbf971171
37 changed files with 469 additions and 657 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -14,4 +14,4 @@ jobs:
             --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
             --tag gym-docker .
      - name: Run tests
-        run: docker run gym-docker pytest --import-mode=append
+        run: docker run gym-docker pytest
--- a/gym/envs/mujoco/init.py
+++ b/gym/envs/mujoco/init.py
@@ -1,4 +1,8 @@
 from gym.envs.mujoco.mujoco_env import MujocoEnv  # isort:skip
+from gym.envs.mujoco.mujoco_rendering import (  # isort:skip
+    RenderContextOffscreen,
+    Viewer,
+)

 # ^^^^^ so that user gets the correct error
 # message if mujoco is not installed correctly
@@ -9,7 +13,6 @@ from gym.envs.mujoco.humanoid import HumanoidEnv
 from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
 from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
 from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
-from gym.envs.mujoco.mujoco_rendering import RenderContextOffscreen, Viewer
 from gym.envs.mujoco.pusher import PusherEnv
 from gym.envs.mujoco.reacher import ReacherEnv
 from gym.envs.mujoco.swimmer import SwimmerEnv
--- a/tests/envs/register_during_make_env.py
+++ b/tests/envs/register_during_make_env.py
@@ -1,6 +1,7 @@
 import gym


+# todo, remove, use a generic testing environment
 class RegisterDuringMakeEnv(gym.Env):
    """Used in `test_registration.py` to check if `env.make` can import and register an env"""

--- a/tests/envs/spec_list.py
+++ b/tests/envs/spec_list.py
@@ -1,70 +0,0 @@
-from gym import envs, logger
-
-SKIP_MUJOCO_V3_WARNING_MESSAGE = (
-    "Cannot run mujoco test because `mujoco-py` is not installed"
-)
-SKIP_MUJOCO_V4_WARNING_MESSAGE = (
-    "Cannot run mujoco test because `mujoco` is not installed"
-)
-
-skip_mujoco_v3 = False
-try:
-    import mujoco_py  # noqa:F401
-except ImportError:
-    skip_mujoco_v3 = True
-
-skip_mujoco_v4 = False
-try:
-    import mujoco  # noqa:F401
-except ImportError:
-    skip_mujoco_v4 = True
-
-
-def should_skip_env_spec_for_tests(spec):
-    # We skip tests for envs that require dependencies or are otherwise
-    # troublesome to run frequently
-    ep = spec.entry_point
-    # Skip mujoco tests for pull request CI
-    if (skip_mujoco_v3 or skip_mujoco_v4) and ep.startswith("gym.envs.mujoco"):
-        return True
-    try:
-        import gym.envs.atari  # noqa:F401
-    except ImportError:
-        if ep.startswith("gym.envs.atari"):
-            return True
-    try:
-        import Box2D  # noqa:F401
-    except ImportError:
-        if ep.startswith("gym.envs.box2d"):
-            return True
-
-    if (
-        "GoEnv" in ep
-        or "HexEnv" in ep
-        or (
-            ep.startswith("gym.envs.atari")
-            and not spec.id.startswith("Pong")
-            and not spec.id.startswith("Seaquest")
-        )
-    ):
-        logger.warn(f"Skipping tests for env {ep}")
-        return True
-    return False
-
-
-def skip_mujoco_py_env_for_test(spec):
-    ep = spec.entry_point
-    version = spec.version
-    if ep.startswith("gym.envs.mujoco") and version < 4:
-        return True
-    return False
-
-
-spec_list = [
-    spec
-    for spec in sorted(envs.registry.values(), key=lambda x: x.id)
-    if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)
-]
-spec_list_no_mujoco_py = [
-    spec for spec in spec_list if not skip_mujoco_py_env_for_test(spec)
-]
--- a/tests/envs/test_action_dim_check.py
+++ b/tests/envs/test_action_dim_check.py
@@ -1,93 +1,102 @@
-from typing import List
-
 import numpy as np
 import pytest

 import gym
-from gym import Env
+from gym import spaces
 from gym.envs.registration import EnvSpec
-from gym.spaces.box import Box
-from gym.spaces.discrete import Discrete
-from tests.envs.spec_list import (
-    SKIP_MUJOCO_V3_WARNING_MESSAGE,
-    skip_mujoco_v3,
-    spec_list,
-)
-
-ENVIRONMENT_IDS = ("HalfCheetah-v2",)
-
-
-def filters_envs_action_space_type(
-    env_spec_list: List[EnvSpec], action_space: type
-) -> List[Env]:
-    """Make environments of specific action_space type.
-
-    This function returns a filtered list of environment from the spec_list that matches the action_space type.
-
-    Args:
-        env_spec_list (list): list of registered environments' specification
-        action_space (gym.spaces.Space): action_space type
-    """
-    filtered_envs = []
-    for spec in env_spec_list:
-        env = gym.make(spec.id)
-        if isinstance(env.action_space, action_space):
-            filtered_envs.append(env)
-    return filtered_envs
-
-
-@pytest.mark.skipif(skip_mujoco_v3, reason=SKIP_MUJOCO_V3_WARNING_MESSAGE)
-@pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS)
-def test_serialize_deserialize(environment_id):
-    env = gym.make(environment_id)
-    env.reset()
-
-    with pytest.raises(ValueError, match="Action dimension mismatch"):
-        env.step([0.1])
-
-    with pytest.raises(ValueError, match="Action dimension mismatch"):
-        env.step(0.1)
-
-
-@pytest.mark.parametrize("env", filters_envs_action_space_type(spec_list, Discrete))
-def test_discrete_actions_out_of_bound(env):
-    """Test out of bound actions in Discrete action_space.
-    In discrete action_space environments, `out-of-bound`
-    actions are not allowed and should raise an exception.
-    Args:
-        env (gym.Env): the gym environment
-    """
-    env.reset()
-
-    action_space = env.action_space
-    upper_bound = action_space.start + action_space.n - 1
-
-    with pytest.raises(Exception):
-        env.step(upper_bound + 1)
+from tests.envs.utils import all_testing_initialised_envs, mujoco_testing_env_specs


@pytest.mark.parametrize(
-    ("env", "seed"),
-    [(env, 42) for env in filters_envs_action_space_type(spec_list, Box)],
+    "env_spec",
+    mujoco_testing_env_specs,
+    ids=[env_spec.id for env_spec in mujoco_testing_env_specs],
 )
-def test_box_actions_out_of_bound(env, seed):
+def test_mujoco_action_dimensions(env_spec: EnvSpec):
+    """Test that for all mujoco environment, mis-dimensioned actions, an error is raised.
+
+    Types of mis-dimensioned actions:
+     * Too few actions
+     * Too many actions
+     * Too few dimensions
+     * Too many dimensions
+     * Incorrect shape
+    """
+    env = env_spec.make(disable_env_checker=True)
+    env.reset()
+
+    # Too few actions
+    with pytest.raises(ValueError, match="Action dimension mismatch"):
+        env.step(env.action_space.sample()[1:])
+
+    # Too many actions
+    with pytest.raises(ValueError, match="Action dimension mismatch"):
+        env.step(np.append(env.action_space.sample(), 0))
+
+    # Too few dimensions
+    with pytest.raises(ValueError, match="Action dimension mismatch"):
+        env.step(0.1)
+
+    # Too many dimensions
+    with pytest.raises(ValueError, match="Action dimension mismatch"):
+        env.step(np.expand_dims(env.action_space.sample(), 0))
+
+    # Incorrect shape
+    with pytest.raises(ValueError, match="Action dimension mismatch"):
+        env.step(np.expand_dims(env.action_space.sample(), 1))
+
+    env.close()
+
+
+@pytest.mark.parametrize(
+    "env",
+    filter(
+        lambda env: isinstance(env.action_space, spaces.Discrete),
+        all_testing_initialised_envs,
+    ),
+)
+def test_discrete_actions_out_of_bound(env: gym.Env):
+    """Test out of bound actions in Discrete action_space.
+
+    In discrete action_space environments, `out-of-bound`
+    actions are not allowed and should raise an exception.
+
+    Args:
+        env (gym.Env): the gym environment
+    """
+    assert isinstance(env.action_space, spaces.Discrete)
+    upper_bound = env.action_space.start + env.action_space.n - 1
+
+    env.reset()
+    with pytest.raises(Exception):
+        env.step(upper_bound + 1)
+
+    env.close()
+
+
+OOB_VALUE = 100
+
+
+@pytest.mark.parametrize(
+    "env", filter(lambda env: isinstance(env, spaces.Box), all_testing_initialised_envs)
+)
+def test_box_actions_out_of_bound(env: gym.Env):
    """Test out of bound actions in Box action_space.
+
    Environments with Box actions spaces perform clipping inside `step`.
    The expected behaviour is that an action `out-of-bound` has the same effect
    of an action with value exactly at the upper (or lower) bound.
+
    Args:
        env (gym.Env): the gym environment
-        seed (int): seed value for determinism
    """
-    OOB_VALUE = 100
+    env.reset(seed=42)

-    env.reset(seed=seed)
-
-    oob_env = gym.make(env.spec.id)
-    oob_env.reset(seed=seed)
+    oob_env = gym.make(env.spec.id, disable_env_checker=True)
+    oob_env.reset(seed=42)

+    assert isinstance(env.action_space, spaces.Box)
    dtype = env.action_space.dtype
-
    upper_bounds = env.action_space.high
    lower_bounds = env.action_space.low

@@ -113,3 +122,5 @@ def test_box_actions_out_of_bound(env, seed):
            oob_obs, _, _, _ = oob_env.step(oob_action)

            assert np.alltrue(obs == oob_obs)
+
+    env.close()
--- a/tests/envs/test_atari_legacy_env_specs.py
+++ b/tests/envs/test_atari_legacy_env_specs.py
@@ -1,136 +0,0 @@
-from itertools import product
-
-import pytest
-
-from gym.envs.registration import registry
-
-pytest.importorskip("gym.envs.atari")
-
-
-def test_ale_legacy_env_specs():
-    versions = ["-v0", "-v4"]
-    suffixes = ["", "NoFrameskip", "Deterministic"]
-    obs_types = ["", "-ram"]
-    games = [
-        "adventure",
-        "air_raid",
-        "alien",
-        "amidar",
-        "assault",
-        "asterix",
-        "asteroids",
-        "atlantis",
-        "bank_heist",
-        "battle_zone",
-        "beam_rider",
-        "berzerk",
-        "bowling",
-        "boxing",
-        "breakout",
-        "carnival",
-        "centipede",
-        "chopper_command",
-        "crazy_climber",
-        "defender",
-        "demon_attack",
-        "double_dunk",
-        "elevator_action",
-        "enduro",
-        "fishing_derby",
-        "freeway",
-        "frostbite",
-        "gopher",
-        "gravitar",
-        "hero",
-        "ice_hockey",
-        "jamesbond",
-        "journey_escape",
-        "kangaroo",
-        "krull",
-        "kung_fu_master",
-        "montezuma_revenge",
-        "ms_pacman",
-        "name_this_game",
-        "phoenix",
-        "pitfall",
-        "pong",
-        "pooyan",
-        "private_eye",
-        "qbert",
-        "riverraid",
-        "road_runner",
-        "robotank",
-        "seaquest",
-        "skiing",
-        "solaris",
-        "space_invaders",
-        "star_gunner",
-        "tennis",
-        "time_pilot",
-        "tutankham",
-        "up_n_down",
-        "venture",
-        "video_pinball",
-        "wizard_of_wor",
-        "yars_revenge",
-        "zaxxon",
-    ]
-
-    # Convert snake case to camel case
-    games = list(map(lambda x: x.title().replace("_", ""), games))
-    specs = list(map("".join, product(games, obs_types, suffixes, versions)))
-
-    """
-    defaults:
-        repeat_action_probability = 0.0
-        full_action_space = False
-        frameskip = (2, 5)
-        game = "Pong"
-        obs_type = "ram"
-        mode = None
-        difficulty = None
-
-    v0: repeat_action_probability = 0.25
-    v4: inherits defaults
-
-    -NoFrameskip: frameskip = 1
-    -Deterministic: frameskip = 4 or 3 for space_invaders
-    """
-    for spec in specs:
-        assert spec in registry.env_specs
-        kwargs = registry.env_specs[spec]._kwargs
-
-        # Assert necessary parameters are set
-        assert "frameskip" in kwargs
-        assert "game" in kwargs
-        assert "obs_type" in kwargs
-        assert "repeat_action_probability" in kwargs
-        assert "full_action_space" in kwargs
-
-        # Common defaults
-        assert kwargs["full_action_space"] is False
-        assert "mode" not in kwargs
-        assert "difficulty" not in kwargs
-
-        if "-ram" in spec:
-            assert kwargs["obs_type"] == "ram"
-        else:
-            assert kwargs["obs_type"] == "rgb"
-
-        if "NoFrameskip" in spec:
-            assert kwargs["frameskip"] == 1
-        elif "Deterministic" in spec:
-            assert isinstance(kwargs["frameskip"], int)
-            frameskip = 3 if "SpaceInvaders" in spec else 4
-            assert kwargs["frameskip"] == frameskip
-        else:
-            assert isinstance(kwargs["frameskip"], tuple) and kwargs["frameskip"] == (
-                2,
-                5,
-            )
-
-        assert spec.endswith("v0") or spec.endswith("v4")
-        if spec.endswith("v0"):
-            assert kwargs["repeat_action_probability"] == 0.25
-        elif spec.endswith("v4"):
-            assert kwargs["repeat_action_probability"] == 0.0
--- a/tests/envs/test_bipedal_walker.py
+++ b/tests/envs/test_bipedal_walker.py
@@ -1,41 +0,0 @@
-"""Test BipedalWalker environment."""
-import pytest
-
-from gym.envs.box2d import BipedalWalker
-
-
-@pytest.mark.parametrize("seed", range(10))
-def test_bipedal_walker_hardcore_creation(seed: int):
-    """Test BipedalWalker hardcore creation.
-
-    BipedalWalker with `hardcore=True` should have ladders
-    stumps and pitfalls. A convenient way to identify if ladders,
-    stumps and pitfall are created is checking whether the terrain
-    has that particular terrain color.
-
-    Args:
-        seed (int): environment seed
-    """
-    HC_TERRAINS_COLOR1 = (255, 255, 255)
-    HC_TERRAINS_COLOR2 = (153, 153, 153)
-
-    env = BipedalWalker(hardcore=False)
-    env.reset(seed=seed)
-
-    hc_env = BipedalWalker(hardcore=True)
-    hc_env.reset(seed=seed)
-
-    for terrain in env.terrain:
-        assert terrain.color1 != HC_TERRAINS_COLOR1
-        assert terrain.color2 != HC_TERRAINS_COLOR2
-
-    hc_terrains_color1_count = 0
-    hc_terrains_color2_count = 0
-    for terrain in hc_env.terrain:
-        if terrain.color1 == HC_TERRAINS_COLOR1:
-            hc_terrains_color1_count += 1
-        if terrain.color2 == HC_TERRAINS_COLOR2:
-            hc_terrains_color2_count += 1
-
-    assert hc_terrains_color1_count > 0
-    assert hc_terrains_color2_count > 0
--- a/tests/envs/test_determinism.py
+++ b/tests/envs/test_determinism.py
@@ -1,83 +0,0 @@
-"""Test environment determinism by performing a rollout."""
-
-import pytest
-
-from gym.utils.env_checker import data_equivalence
-from tests.envs.spec_list import spec_list
-
-
-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
-def test_env(spec):
-    """Run a rollout with two environments and assert equality.
-
-    This test run a rollout of NUM_STEPS steps with two environments
-    initialized with the same seed and assert that:
-
-    - observation after first reset are the same
-    - same actions are sampled by the two envs
-    - observations are contained in the observation space
-    - obs, rew, done and info are equals between the two envs
-
-    Args:
-        spec (EnvSpec): Environment specification
-
-    """
-    # Note that this precludes running this test in multiple
-    # threads. However, we probably already can't do multithreading
-    # due to some environments.
-    SEED = 0
-    NUM_STEPS = 50
-
-    env1, env2 = spec.make(), spec.make()
-
-    initial_observation1 = env1.reset(seed=SEED)
-    initial_observation2 = env2.reset(seed=SEED)
-
-    env1.action_space.seed(SEED)
-    env2.action_space.seed(SEED)
-
-    assert data_equivalence(
-        initial_observation1, initial_observation2
-    ), f"Initial Observations 1 and 2 are not equivalent. initial obs 1={initial_observation1}, initial obs 2={initial_observation2}"
-
-    for i in range(NUM_STEPS):
-        action1 = env1.action_space.sample()
-        action2 = env2.action_space.sample()
-
-        try:
-            assert data_equivalence(
-                action1, action2
-            ), f"Action 1 and 2 are not equivalent. action 1={action1}, action 2={action2}"
-        except AssertionError:
-            print(f"env 1 action space={env1.action_space}")
-            print(f"env 2 action space={env2.action_space}")
-            print(f"[{i}] action sample 1={action1}, action sample 2={action2}")
-            raise
-
-        # Don't check rollout equality if it's a nondeterministic
-        # environment.
-        if spec.nondeterministic:
-            return
-
-        obs1, rew1, done1, info1 = env1.step(action1)
-        obs2, rew2, done2, info2 = env2.step(action2)
-
-        assert data_equivalence(
-            obs1, obs2
-        ), f"Observation 1 and 2 are not equivalent. obs 1={obs1}, obs 2={obs2}"
-
-        assert env1.observation_space.contains(obs1)
-        assert env2.observation_space.contains(obs2)
-
-        assert rew1 == rew2, f"[{i}] reward1: {rew1}, reward2: {rew2}"
-        assert done1 == done2, f"[{i}] done1: {done1}, done2: {done2}"
-        assert data_equivalence(
-            info1, info2
-        ), f"Info 1 and 2 are not equivalent. info 1={info1}, info 2={info2}"
-
-        if done1:  # done2 verified in previous assertion
-            env1.reset(seed=SEED)
-            env2.reset(seed=SEED)
-
-    env1.close()
-    env2.close()
--- a/tests/envs/test_env_implementation.py
+++ b/tests/envs/test_env_implementation.py
@@ -0,0 +1,82 @@
+import pytest
+
+import gym
+from gym.envs.box2d import BipedalWalker
+from gym.envs.box2d.lunar_lander import demo_heuristic_lander
+from gym.envs.toy_text.frozen_lake import generate_random_map
+
+
+def test_lunar_lander_heuristics():
+    lunar_lander = gym.make("LunarLander-v2", disable_env_checker=True)
+    total_reward = demo_heuristic_lander(lunar_lander, seed=1)
+    assert total_reward > 100
+
+
+@pytest.mark.parametrize("seed", range(5))
+def test_bipedal_walker_hardcore_creation(seed: int):
+    """Test BipedalWalker hardcore creation.
+
+    BipedalWalker with `hardcore=True` should have ladders
+    stumps and pitfalls. A convenient way to identify if ladders,
+    stumps and pitfall are created is checking whether the terrain
+    has that particular terrain color.
+
+    Args:
+        seed (int): environment seed
+    """
+    HC_TERRAINS_COLOR1 = (255, 255, 255)
+    HC_TERRAINS_COLOR2 = (153, 153, 153)
+
+    env = gym.make("BipedalWalker-v3", disable_env_checker=True).unwrapped
+    hc_env = gym.make("BipedalWalkerHardcore-v3", disable_env_checker=True).unwrapped
+    assert isinstance(env, BipedalWalker) and isinstance(hc_env, BipedalWalker)
+    assert env.hardcore is False and hc_env.hardcore is True
+
+    env.reset(seed=seed)
+    hc_env.reset(seed=seed)
+
+    for terrain in env.terrain:
+        assert terrain.color1 != HC_TERRAINS_COLOR1
+        assert terrain.color2 != HC_TERRAINS_COLOR2
+
+    hc_terrains_color1_count = 0
+    hc_terrains_color2_count = 0
+    for terrain in hc_env.terrain:
+        if terrain.color1 == HC_TERRAINS_COLOR1:
+            hc_terrains_color1_count += 1
+        if terrain.color2 == HC_TERRAINS_COLOR2:
+            hc_terrains_color2_count += 1
+
+    assert hc_terrains_color1_count > 0
+    assert hc_terrains_color2_count > 0
+
+
+@pytest.mark.parametrize("map_size", [5, 10, 16])
+def test_frozenlake_dfs_map_generation(map_size: int):
+    """Frozenlake has the ability to generate random maps.
+
+    This function checks that the random maps will always be possible to solve for sizes 5, 10, 16,
+    currently only 8x8 maps can be generated.
+    """
+    new_frozenlake = generate_random_map(map_size)
+    assert len(new_frozenlake) == map_size
+    assert len(new_frozenlake[0]) == map_size
+
+    # Runs a depth first search through the map to find the path.
+    directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
+    frontier, discovered = [], set()
+    frontier.append((0, 0))
+    while frontier:
+        row, col = frontier.pop()
+        if (row, col) not in discovered:
+            discovered.add((row, col))
+
+            for row_direction, col_direction in directions:
+                new_row = row + row_direction
+                new_col = col + col_direction
+                if 0 <= new_row < map_size and 0 <= new_col < map_size:
+                    if new_frozenlake[new_row][new_col] == "G":
+                        return  # Successful, a route through the map was found
+                    if new_frozenlake[new_row][new_col] not in "#H":
+                        frontier.append((new_row, new_col))
+    raise AssertionError("No path through the frozenlake was found.")
--- a/tests/envs/test_envs.py
+++ b/tests/envs/test_envs.py
@@ -1,79 +1,84 @@
-from typing import List
-
-import numpy as np
 import pytest

-from gym import envs
-from gym.spaces import Box
+from gym.envs.registration import EnvSpec
 from gym.utils.env_checker import check_env
-from tests.envs.spec_list import spec_list, spec_list_no_mujoco_py
-
+from tests.envs.utils import all_testing_env_specs, assert_equals, gym_testing_env_specs

 # This runs a smoketest on each official registered env. We may want
 # to try also running environments which are not officially registered
 # envs.
-@pytest.mark.filterwarnings(
-    "ignore:.*We recommend you to use a symmetric and normalized Box action space.*"
-)
+
+
@pytest.mark.parametrize(
-    "spec", spec_list_no_mujoco_py, ids=[spec.id for spec in spec_list_no_mujoco_py]
+    "env_spec", gym_testing_env_specs, ids=[spec.id for spec in gym_testing_env_specs]
 )
-def test_env(spec):
-    # Capture warnings
-    with pytest.warns(None) as warnings:
-        env = spec.make()
-
-    # Test if env adheres to Gym API
-    check_env(env, skip_render_check=True)
-
-    # Check that dtype is explicitly declared for gym.Box spaces
-    for warning_msg in warnings:
-        assert "autodetected dtype" not in str(warning_msg.message)
-
-    ob_space = env.observation_space
-    act_space = env.action_space
-    ob = env.reset()
-    assert ob_space.contains(ob), f"Reset observation: {ob!r} not in space"
-    if isinstance(ob_space, Box):
-        # Only checking dtypes for Box spaces to avoid iterating through tuple entries
-        assert (
-            ob.dtype == ob_space.dtype
-        ), f"Reset observation dtype: {ob.dtype}, expected: {ob_space.dtype}"
-
-    a = act_space.sample()
-    observation, reward, done, _info = env.step(a)
-    assert ob_space.contains(
-        observation
-    ), f"Step observation: {observation!r} not in space"
-    assert np.isscalar(reward), f"{reward} is not a scalar for {env}"
-    assert isinstance(done, bool), f"Expected {done} to be a boolean"
-    if isinstance(ob_space, Box):
-        assert (
-            observation.dtype == ob_space.dtype
-        ), f"Step observation dtype: {ob.dtype}, expected: {ob_space.dtype}"
+def test_run_env_checker(env_spec: EnvSpec):
+    """Runs the gym environment checker on the environment spec that calls the `reset`, `step` and `render`."""
+    env = env_spec.make(disable_env_checker=True)
+    check_env(env, skip_render_check=False)

    env.close()


-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
-def test_reset_info(spec):
-
-    with pytest.warns(None):
-        env = spec.make()
-
-    ob_space = env.observation_space
-    obs = env.reset()
-    assert ob_space.contains(obs)
-    obs = env.reset(return_info=False)
-    assert ob_space.contains(obs)
-    obs, info = env.reset(return_info=True)
-    assert ob_space.contains(obs)
-    assert isinstance(info, dict)
-    env.close()
+# Note that this precludes running this test in multiple threads.
+# However, we probably already can't do multithreading due to some environments.
+SEED = 0
+NUM_STEPS = 50


@pytest.mark.parametrize(
-    "spec", spec_list_no_mujoco_py, ids=[spec.id for spec in spec_list_no_mujoco_py]
+    "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs]
+)
+def test_env_determinism_rollout(env_spec: EnvSpec):
+    """Run a rollout with two environments and assert equality.
+
+    This test run a rollout of NUM_STEPS steps with two environments
+    initialized with the same seed and assert that:
+
+    - observation after first reset are the same
+    - same actions are sampled by the two envs
+    - observations are contained in the observation space
+    - obs, rew, done and info are equals between the two envs
+    """
+    # Don't check rollout equality if it's a nondeterministic environment.
+    if env_spec.nondeterministic is True:
+        return
+
+    env_1 = env_spec.make(disable_env_checker=True)
+    env_2 = env_spec.make(disable_env_checker=True)
+
+    initial_obs_1 = env_1.reset(seed=SEED)
+    initial_obs_2 = env_2.reset(seed=SEED)
+    assert_equals(initial_obs_1, initial_obs_2)
+
+    env_1.action_space.seed(SEED)
+
+    for time_step in range(NUM_STEPS):
+        # We don't evaluate the determinism of actions
+        action = env_1.action_space.sample()
+
+        obs_1, rew_1, done_1, info_1 = env_1.step(action)
+        obs_2, rew_2, done_2, info_2 = env_2.step(action)
+
+        assert_equals(obs_1, obs_2, f"[{time_step}] ")
+        assert env_1.observation_space.contains(
+            obs_1
+        )  # obs_2 verified by previous assertion
+
+        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
+        assert done_1 == done_2, f"[{time_step}] done 1={done_1}, done 2={done_2}"
+        assert_equals(info_1, info_2, f"[{time_step}] ")
+
+        if done_1:  # done_2 verified by previous assertion
+            env_1.reset(seed=SEED)
+            env_2.reset(seed=SEED)
+
+    env_1.close()
+    env_2.close()
+
+
+@pytest.mark.parametrize(
+    "spec", gym_testing_env_specs, ids=[spec.id for spec in gym_testing_env_specs]
 )
 def test_render_modes(spec):
    env = spec.make()
@@ -85,17 +90,3 @@ def test_render_modes(spec):
            new_env.reset()
            new_env.step(new_env.action_space.sample())
            new_env.render()
-
-
-def test_env_render_result_is_immutable():
-    environs = [
-        envs.make("Taxi-v3", render_mode="ansi"),
-        envs.make("FrozenLake-v1", render_mode="ansi"),
-    ]
-
-    for env in environs:
-        env.reset()
-        output = env.render()
-        assert isinstance(output, List)
-        assert isinstance(output[0], str)
-        env.close()
--- a/tests/envs/test_frozenlake_dfs.py
+++ b/tests/envs/test_frozenlake_dfs.py
@@ -1,30 +0,0 @@
-from gym.envs.toy_text.frozen_lake import generate_random_map
-
-
-# Test that FrozenLake map generation creates valid maps of various sizes.
-def test_frozenlake_dfs_map_generation():
-    def frozenlake_dfs_path_exists(res):
-        frontier, discovered = [], set()
-        frontier.append((0, 0))
-        while frontier:
-            r, c = frontier.pop()
-            if not (r, c) in discovered:
-                discovered.add((r, c))
-                directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
-                for x, y in directions:
-                    r_new = r + x
-                    c_new = c + y
-                    if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
-                        continue
-                    if res[r_new][c_new] == "G":
-                        return True
-                    if res[r_new][c_new] not in "#H":
-                        frontier.append((r_new, c_new))
-        return False
-
-    map_sizes = [5, 10, 200]
-    for size in map_sizes:
-        new_frozenlake = generate_random_map(size)
-        assert len(new_frozenlake) == size
-        assert len(new_frozenlake[0]) == size
-        assert frozenlake_dfs_path_exists(new_frozenlake)
--- a/tests/envs/test_lunar_lander.py
+++ b/tests/envs/test_lunar_lander.py
@@ -1,24 +0,0 @@
-import pytest
-
-try:
-    import Box2D
-
-    from gym.envs.box2d.lunar_lander import LunarLander, demo_heuristic_lander
-except ImportError:
-    Box2D = None
-
-
-@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
-def test_lunar_lander():
-    _test_lander(LunarLander(), seed=0)
-
-
-@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
-def test_lunar_lander_continuous():
-    _test_lander(LunarLander(continuous=True), seed=0)
-
-
-@pytest.mark.skipif(Box2D is None, reason="Box2D not installed")
-def _test_lander(env, seed=None, render=False):
-    total_reward = demo_heuristic_lander(env, seed=seed, render=render)
-    assert total_reward > 100
--- a/tests/envs/test_mujoco.py
+++ b/tests/envs/test_mujoco.py
@@ -0,0 +1,57 @@
+import numpy as np
+import pytest
+
+import gym
+from gym import envs
+from tests.envs.utils import mujoco_testing_env_specs
+
+EPS = 1e-6
+
+
+def verify_environments_match(
+    old_env_id: str, new_env_id: str, seed: int = 1, num_actions: int = 1000
+):
+    """Verifies with two environment ids (old and new) are identical in obs, reward and done
+    (except info where all old info must be contained in new info)."""
+    old_env = envs.make(old_env_id, disable_env_checker=True)
+    new_env = envs.make(new_env_id, disable_env_checker=True)
+
+    old_reset_obs = old_env.reset(seed=seed)
+    new_reset_obs = new_env.reset(seed=seed)
+
+    np.testing.assert_allclose(old_reset_obs, new_reset_obs)
+
+    for i in range(num_actions):
+        action = old_env.action_space.sample()
+        old_obs, old_reward, old_done, old_info = old_env.step(action)
+        new_obs, new_reward, new_done, new_info = new_env.step(action)
+
+        np.testing.assert_allclose(old_obs, new_obs, atol=EPS)
+        np.testing.assert_allclose(old_reward, new_reward, atol=EPS)
+        np.testing.assert_equal(old_done, new_done)
+
+        for key in old_info:
+            np.testing.assert_allclose(old_info[key], new_info[key], atol=EPS)
+
+        if old_done:
+            break
+
+
+MUJOCO_V2_V3_ENVS = [
+    spec.name
+    for spec in mujoco_testing_env_specs
+    if spec.version == 2 and f"{spec.name}-v3" in gym.envs.registry
+]
+
+
+@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
+def test_mujoco_v2_to_v3_conversion(env_name: str):
+    """Checks that all v2 mujoco environments are the same as v3 environments."""
+    verify_environments_match(f"{env_name}-v2", f"{env_name}-v3")
+
+
+@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
+def test_mujoco_incompatible_v3_to_v2(env_name: str):
+    """Checks that the v3 environment are slightly different from v2, (v3 has additional info keys that v2 does not)."""
+    with pytest.raises(KeyError):
+        verify_environments_match(f"{env_name}-v3", f"{env_name}-v2")
--- a/tests/envs/test_mujoco_v2_to_v3_conversion.py
+++ b/tests/envs/test_mujoco_v2_to_v3_conversion.py
@@ -1,63 +0,0 @@
-import unittest
-
-import numpy as np
-
-from gym import envs
-from tests.envs.spec_list import SKIP_MUJOCO_V3_WARNING_MESSAGE, skip_mujoco_v3
-
-
-def verify_environments_match(
-    old_environment_id, new_environment_id, seed=1, num_actions=1000
-):
-    old_environment = envs.make(old_environment_id)
-    new_environment = envs.make(new_environment_id)
-
-    old_reset_observation = old_environment.reset(seed=seed)
-    new_reset_observation = new_environment.reset(seed=seed)
-
-    np.testing.assert_allclose(old_reset_observation, new_reset_observation)
-
-    for i in range(num_actions):
-        action = old_environment.action_space.sample()
-        old_observation, old_reward, old_done, old_info = old_environment.step(action)
-        new_observation, new_reward, new_done, new_info = new_environment.step(action)
-
-        eps = 1e-6
-        np.testing.assert_allclose(old_observation, new_observation, atol=eps)
-        np.testing.assert_allclose(old_reward, new_reward, atol=eps)
-        np.testing.assert_allclose(old_done, new_done, atol=eps)
-
-        for key in old_info:
-            np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)
-
-
-@unittest.skipIf(skip_mujoco_v3, SKIP_MUJOCO_V3_WARNING_MESSAGE)
-class Mujocov2Tov3ConversionTest(unittest.TestCase):
-    def test_environments_match(self):
-        test_cases = (
-            {"old_id": "Swimmer-v2", "new_id": "Swimmer-v3"},
-            {"old_id": "Hopper-v2", "new_id": "Hopper-v3"},
-            {"old_id": "Walker2d-v2", "new_id": "Walker2d-v3"},
-            {"old_id": "HalfCheetah-v2", "new_id": "HalfCheetah-v3"},
-            {"old_id": "Ant-v2", "new_id": "Ant-v3"},
-            {"old_id": "Humanoid-v2", "new_id": "Humanoid-v3"},
-        )
-
-        for test_case in test_cases:
-            verify_environments_match(test_case["old_id"], test_case["new_id"])
-
-        # Raises KeyError because the new envs have extra info
-        with self.assertRaises(KeyError):
-            verify_environments_match("Swimmer-v3", "Swimmer-v2")
-
-        # Raises KeyError because the new envs have extra info
-        with self.assertRaises(KeyError):
-            verify_environments_match("Humanoid-v3", "Humanoid-v2")
-
-        # Raises KeyError because the new envs have extra info
-        with self.assertRaises(KeyError):
-            verify_environments_match("Swimmer-v3", "Swimmer-v2")
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tests/envs/test_registration.py
+++ b/tests/envs/test_registration.py
@@ -62,7 +62,7 @@ def register_some_envs():


 def test_make():
-    env = envs.make("CartPole-v1")
+    env = envs.make("CartPole-v1", disable_env_checker=True)
    assert env.spec.id == "CartPole-v1"
    assert isinstance(env.unwrapped, cartpole.CartPoleEnv)

@@ -128,7 +128,7 @@ def test_env_suggestions(register_some_envs, env_id_input, env_id_suggested):
    with pytest.raises(
        error.UnregisteredEnv, match=f"Did you mean: `{env_id_suggested}` ?"
    ):
-        envs.make(env_id_input)
+        gym.make(env_id_input, disable_env_checker=True)


@pytest.mark.parametrize(
@@ -151,14 +151,14 @@ def test_env_version_suggestions(
            error.DeprecatedEnv,
            match=match_str,
        ):
-            envs.make(env_id_input)
+            envs.make(env_id_input, disable_env_checker=True)
    else:
        match_str = f"versioned environments: \\[ {suggested_versions} \\]"
        with pytest.raises(
            error.UnregisteredEnv,
            match=match_str,
        ):
-            envs.make(env_id_input)
+            envs.make(env_id_input, disable_env_checker=True)


 def test_make_with_kwargs():
@@ -166,6 +166,7 @@ def test_make_with_kwargs():
        "test.ArgumentEnv-v0",
        arg2="override_arg2",
        arg3="override_arg3",
+        disable_env_checker=True,
    )
    assert env.spec.id == "test.ArgumentEnv-v0"
    assert isinstance(env.unwrapped, ArgumentEnv)
@@ -194,7 +195,10 @@ def test_spec():

 def test_spec_with_kwargs():
    map_name_value = "8x8"
-    env = gym.make("FrozenLake-v1", map_name=map_name_value)
+    env = gym.make(
+        "FrozenLake-v1",
+        map_name=map_name_value,
+    )
    assert env.spec.kwargs["map_name"] == map_name_value


@@ -277,7 +281,9 @@ def test_register_versioned_unversioned():

 def test_return_latest_versioned_env(register_some_envs):
    with pytest.warns(UserWarning):
-        env = envs.make("MyAwesomeNamespace/MyAwesomeVersionedEnv")
+        env = envs.make(
+            "MyAwesomeNamespace/MyAwesomeVersionedEnv", disable_env_checker=True
+        )
    assert env.spec.id == "MyAwesomeNamespace/MyAwesomeVersionedEnv-v5"


@@ -295,4 +301,7 @@ def test_namespace():

 def test_import_module_during_make():
    # Test custom environment which is registered at make
-    gym.make("tests.envs.register_during_make_env:RegisterDuringMakeEnv-v0")
+    gym.make(
+        "tests.envs.register_during_make_env:RegisterDuringMakeEnv-v0",
+        disable_env_checker=True,
+    )
--- a/tests/envs/utils.py
+++ b/tests/envs/utils.py
@@ -0,0 +1,78 @@
+"""Finds all the specs that we can test with"""
+from typing import Optional
+
+import numpy as np
+
+import gym
+from gym import logger
+from gym.envs.registration import EnvSpec
+
+
+def try_make_env(env_spec: EnvSpec) -> Optional[gym.Env]:
+    """Tries to make the environment showing if it is possible. Warning the environments have no wrappers, including time limit and order enforcing."""
+    try:
+        return env_spec.make(disable_env_checker=True).unwrapped
+    except ImportError as e:
+        logger.warn(f"Not testing {env_spec.id} due to error: {e}")
+        return None
+
+
+# Tries to make all environment to test with
+all_testing_initialised_envs = list(
+    filter(None, [try_make_env(env_spec) for env_spec in gym.envs.registry.values()])
+)
+
+# All testing, mujoco and gym environment specs
+all_testing_env_specs = [env.spec for env in all_testing_initialised_envs]
+mujoco_testing_env_specs = [
+    env_spec
+    for env_spec in all_testing_env_specs
+    if "gym.envs.mujoco" in env_spec.entry_point
+]
+gym_testing_env_specs = [
+    env_spec
+    for env_spec in all_testing_env_specs
+    if any(
+        f"gym.{ep}" in env_spec.entry_point
+        for ep in ["box2d", "classic_control", "toy_text"]
+    )
+]
+# TODO, add minimum testing env spec in testing
+minimum_testing_env_specs = [
+    env_spec
+    for env_spec in [
+        "CartPole-v1",
+        "MountainCarContinuous-v0",
+        "LunarLander-v2",
+        "LunarLanderContinuous-v2",
+        "CarRacing-v1",
+        "Blackjack-v1",
+        "Reacher-v4",
+    ]
+    if env_spec in all_testing_env_specs
+]
+
+
+def assert_equals(a, b, prefix=None):
+    """Assert equality of data structures `a` and `b`.
+
+    Args:
+        a: first data structure
+        b: second data structure
+        prefix: prefix for failed assertion message for types and dicts
+    """
+    assert type(a) == type(b), f"{prefix}Differing types: {a} and {b}"
+    if isinstance(a, dict):
+        assert list(a.keys()) == list(b.keys()), f"{prefix}Key sets differ: {a} and {b}"
+
+        for k in a.keys():
+            v_a = a[k]
+            v_b = b[k]
+            assert_equals(v_a, v_b)
+    elif isinstance(a, np.ndarray):
+        np.testing.assert_array_equal(a, b)
+    elif isinstance(a, tuple):
+        for elem_from_a, elem_from_b in zip(a, b):
+            assert_equals(elem_from_a, elem_from_b)
+    else:
+        assert a == b
--- a/tests/utils/test_play.py
+++ b/tests/utils/test_play.py
@@ -168,7 +168,7 @@ def test_play_loop_real_env():

        return obs_t, obs_tp1, action, rew, done, info

-    env = gym.make(ENV)
+    env = gym.make(ENV, disable_env_checker=True)
    env.reset(seed=SEED)
    keys_to_action = dummy_keys_to_action()

@@ -179,7 +179,7 @@ def test_play_loop_real_env():
        action = keys_to_action[(e.key,)]
        obs, _, _, _ = env.step(action)

-    env_play = gym.make(ENV)
+    env_play = gym.make(ENV, disable_env_checker=True)
    status = PlayStatus(callback)
    play(env_play, callback=status.callback, keys_to_action=keys_to_action, seed=SEED)

--- a/tests/vector/test_sync_vector_env.py
+++ b/tests/vector/test_sync_vector_env.py
@@ -4,7 +4,7 @@ import pytest
 from gym.envs.registration import EnvSpec
 from gym.spaces import Box, Discrete, MultiDiscrete, Tuple
 from gym.vector.sync_vector_env import SyncVectorEnv
-from tests.envs.spec_list import spec_list
+from tests.envs.utils import all_testing_env_specs
 from tests.vector.utils import (
    CustomSpace,
    assert_rng_equal,
@@ -188,7 +188,9 @@ def test_sync_vector_env_seed():
        assert np.all(env_action == vector_action)


-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
+)
 def test_sync_vector_determinism(spec: EnvSpec, seed: int = 123, n: int = 3):
    """Check that for all environments, the sync vector envs produce the same action samples using the same seeds"""
    env_1 = SyncVectorEnv([make_env(spec.id, seed=seed) for _ in range(n)])
--- a/tests/vector/test_vector_env_info.py
+++ b/tests/vector/test_vector_env_info.py
@@ -13,7 +13,9 @@ SEED = 42

@pytest.mark.parametrize("asynchronous", [True, False])
 def test_vector_env_info(asynchronous):
-    env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, asynchronous=asynchronous)
+    env = gym.vector.make(
+        ENV_ID, num_envs=NUM_ENVS, asynchronous=asynchronous, disable_env_checker=True
+    )
    env.reset(seed=SEED)
    for _ in range(ENV_STEPS):
        env.action_space.seed(SEED)
--- a/tests/vector/utils.py
+++ b/tests/vector/utils.py
@@ -109,7 +109,7 @@ class CustomSpaceEnv(gym.Env):

 def make_env(env_name, seed, **kwargs):
    def _make():
-        env = gym.make(env_name, **kwargs)
+        env = gym.make(env_name, disable_env_checker=True, **kwargs)
        env.action_space.seed(seed)
        env.reset(seed=seed)
        return env
--- a/tests/wrappers/test_atari_preprocessing.py
+++ b/tests/wrappers/test_atari_preprocessing.py
@@ -9,7 +9,7 @@ pytest.importorskip("gym.envs.atari")

@pytest.fixture(scope="module")
 def env_fn():
-    return lambda: gym.make("PongNoFrameskip-v4")
+    return lambda: gym.make("PongNoFrameskip-v4", disable_env_checker=True)


 def test_atari_preprocessing_grayscale(env_fn):
--- a/tests/wrappers/test_autoreset.py
+++ b/tests/wrappers/test_autoreset.py
@@ -8,7 +8,7 @@ import pytest

 import gym
 from gym.wrappers import AutoResetWrapper
-from tests.envs.spec_list import spec_list
+from tests.envs.utils import all_testing_env_specs


 class DummyResetEnv(gym.Env):
@@ -61,7 +61,9 @@ def unwrap_env(env) -> Generator[gym.Wrapper, None, None]:
        env = env.env


-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
+)
 def test_make_autoreset_true(spec):
    """Tests gym.make with `autoreset=True`, and check that the reset actually happens.

@@ -71,7 +73,7 @@ def test_make_autoreset_true(spec):
     amount of time with random actions, which is true as of the time of adding this test.
    """
    with pytest.warns(None):
-        env = gym.make(spec.id, autoreset=True)
+        env = gym.make(spec.id, autoreset=True, disable_env_checker=True)
    assert AutoResetWrapper in unwrap_env(env)

    env.reset(seed=0)
@@ -85,21 +87,23 @@ def test_make_autoreset_true(spec):
    env.close()


-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
+)
 def test_gym_make_autoreset(spec):
    """Tests that `gym.make` autoreset wrapper is applied only when `gym.make(..., autoreset=True)`."""
    with pytest.warns(None):
-        env = gym.make(spec.id)
+        env = gym.make(spec.id, disable_env_checker=True)
    assert AutoResetWrapper not in unwrap_env(env)
    env.close()

    with pytest.warns(None):
-        env = gym.make(spec.id, autoreset=False)
+        env = gym.make(spec.id, autoreset=False, disable_env_checker=True)
    assert AutoResetWrapper not in unwrap_env(env)
    env.close()

    with pytest.warns(None):
-        env = gym.make(spec.id, autoreset=True)
+        env = gym.make(spec.id, autoreset=True, disable_env_checker=True)
    assert AutoResetWrapper in unwrap_env(env)
    env.close()

--- a/tests/wrappers/test_clip_action.py
+++ b/tests/wrappers/test_clip_action.py
@@ -6,8 +6,10 @@ from gym.wrappers import ClipAction

 def test_clip_action():
    # mountaincar: action-based rewards
-    env = gym.make("MountainCarContinuous-v0")
-    wrapped_env = ClipAction(gym.make("MountainCarContinuous-v0"))
+    env = gym.make("MountainCarContinuous-v0", disable_env_checker=True)
+    wrapped_env = ClipAction(
+        gym.make("MountainCarContinuous-v0", disable_env_checker=True)
+    )

    seed = 0

--- a/tests/wrappers/test_flatten_observation.py
+++ b/tests/wrappers/test_flatten_observation.py
@@ -8,7 +8,7 @@ from gym.wrappers import FlattenObservation

@pytest.mark.parametrize("env_id", ["Blackjack-v1"])
 def test_flatten_observation(env_id):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    wrapped_env = FlattenObservation(env)

    obs = env.reset()
--- a/tests/wrappers/test_frame_stack.py
+++ b/tests/wrappers/test_frame_stack.py
@@ -28,13 +28,13 @@ pytest.importorskip("gym.envs.atari")
    ],
 )
 def test_frame_stack(env_id, num_stack, lz4_compress):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    shape = env.observation_space.shape
    env = FrameStack(env, num_stack, lz4_compress)
    assert env.observation_space.shape == (num_stack,) + shape
    assert env.observation_space.dtype == env.env.observation_space.dtype

-    dup = gym.make(env_id)
+    dup = gym.make(env_id, disable_env_checker=True)

    obs = env.reset(seed=0)
    dup_obs = dup.reset(seed=0)
--- a/tests/wrappers/test_gray_scale_observation.py
+++ b/tests/wrappers/test_gray_scale_observation.py
@@ -13,8 +13,12 @@ pytest.importorskip("cv2")
 )
@pytest.mark.parametrize("keep_dim", [True, False])
 def test_gray_scale_observation(env_id, keep_dim):
-    gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True)
-    rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False)
+    gray_env = AtariPreprocessing(
+        gym.make(env_id, disable_env_checker=True), screen_size=84, grayscale_obs=True
+    )
+    rgb_env = AtariPreprocessing(
+        gym.make(env_id, disable_env_checker=True), screen_size=84, grayscale_obs=False
+    )
    wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
    assert rgb_env.observation_space.shape[-1] == 3

--- a/tests/wrappers/test_order_enforcing.py
+++ b/tests/wrappers/test_order_enforcing.py
@@ -4,14 +4,16 @@ import gym
 from gym.envs.classic_control import CartPoleEnv
 from gym.error import ResetNeeded
 from gym.wrappers import OrderEnforcing
-from tests.envs.spec_list import spec_list
+from tests.envs.utils import all_testing_env_specs
 from tests.wrappers.utils import has_wrapper


-@pytest.mark.parametrize("spec", spec_list, ids=[spec.id for spec in spec_list])
+@pytest.mark.parametrize(
+    "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs]
+)
 def test_gym_make_order_enforcing(spec):
    """Checks that gym.make wrappers the environment with the OrderEnforcing wrapper."""
-    env = gym.make(spec.id)
+    env = gym.make(spec.id, disable_env_checker=True)

    assert has_wrapper(env, OrderEnforcing)

--- a/tests/wrappers/test_record_episode_statistics.py
+++ b/tests/wrappers/test_record_episode_statistics.py
@@ -9,7 +9,7 @@ from gym.wrappers.record_episode_statistics import add_vector_episode_statistics
@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
@pytest.mark.parametrize("deque_size", [2, 5])
 def test_record_episode_statistics(env_id, deque_size):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    env = RecordEpisodeStatistics(env, deque_size)

    for n in range(5):
@@ -27,7 +27,7 @@ def test_record_episode_statistics(env_id, deque_size):


 def test_record_episode_statistics_reset_info():
-    env = gym.make("CartPole-v1")
+    env = gym.make("CartPole-v1", disable_env_checker=True)
    env = RecordEpisodeStatistics(env)
    ob_space = env.observation_space
    obs = env.reset()
@@ -43,7 +43,11 @@ def test_record_episode_statistics_reset_info():
 )
 def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous):
    envs = gym.vector.make(
-        "CartPole-v1", render_mode=None, num_envs=num_envs, asynchronous=asynchronous
+        "CartPole-v1",
+        render_mode=None,
+        num_envs=num_envs,
+        asynchronous=asynchronous,
+        disable_env_checker=True,
    )
    envs = RecordEpisodeStatistics(envs)
    max_episode_step = (
@@ -66,7 +70,7 @@ def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous):


 def test_wrong_wrapping_order():
-    envs = gym.vector.make("CartPole-v1", num_envs=3)
+    envs = gym.vector.make("CartPole-v1", num_envs=3, disable_env_checker=True)
    wrapped_env = RecordEpisodeStatistics(VectorListInfo(envs))
    wrapped_env.reset()

--- a/tests/wrappers/test_record_video.py
+++ b/tests/wrappers/test_record_video.py
@@ -6,8 +6,7 @@ from gym.wrappers import capped_cubic_video_schedule


 def test_record_video_using_default_trigger():
-
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.wrappers.RecordVideo(env, "videos")
    env.reset()
    for _ in range(199):
@@ -25,7 +24,7 @@ def test_record_video_using_default_trigger():


 def test_record_video_reset_return_info():
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    ob_space = env.observation_space
    obs, info = env.reset(return_info=True)
@@ -35,7 +34,7 @@ def test_record_video_reset_return_info():
    assert ob_space.contains(obs)
    assert isinstance(info, dict)

-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    ob_space = env.observation_space
    obs = env.reset(return_info=False)
@@ -44,7 +43,7 @@ def test_record_video_reset_return_info():
    shutil.rmtree("videos")
    assert ob_space.contains(obs)

-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    ob_space = env.observation_space
    obs = env.reset()
@@ -55,7 +54,7 @@ def test_record_video_reset_return_info():


 def test_record_video_step_trigger():
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    env._max_episode_steps = 20
    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
    env.reset()
@@ -73,7 +72,7 @@ def test_record_video_step_trigger():

 def make_env(gym_id, seed, **kwargs):
    def thunk():
-        env = gym.make(gym_id, **kwargs)
+        env = gym.make(gym_id, disable_env_checker=True, **kwargs)
        env._max_episode_steps = 20
        if seed == 1:
            env = gym.wrappers.RecordVideo(
--- a/tests/wrappers/test_rescale_action.py
+++ b/tests/wrappers/test_rescale_action.py
@@ -6,13 +6,15 @@ from gym.wrappers import RescaleAction


 def test_rescale_action():
-    env = gym.make("CartPole-v1")
+    env = gym.make("CartPole-v1", disable_env_checker=True)
    with pytest.raises(AssertionError):
        env = RescaleAction(env, -1, 1)
    del env

-    env = gym.make("Pendulum-v1")
-    wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1)
+    env = gym.make("Pendulum-v1", disable_env_checker=True)
+    wrapped_env = RescaleAction(
+        gym.make("Pendulum-v1", disable_env_checker=True), -1, 1
+    )

    seed = 0

--- a/tests/wrappers/test_resize_observation.py
+++ b/tests/wrappers/test_resize_observation.py
@@ -11,7 +11,7 @@ pytest.importorskip("gym.envs.atari")
 )
@pytest.mark.parametrize("shape", [16, 32, (8, 5), [10, 7]])
 def test_resize_observation(env_id, shape):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    env = ResizeObservation(env, shape)

    assert env.observation_space.shape[-1] == 3
--- a/tests/wrappers/test_time_aware_observation.py
+++ b/tests/wrappers/test_time_aware_observation.py
@@ -6,7 +6,7 @@ from gym.wrappers import TimeAwareObservation

@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
 def test_time_aware_observation(env_id):
-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    wrapped_env = TimeAwareObservation(env)

    assert wrapped_env.observation_space.shape[0] == env.observation_space.shape[0] + 1
--- a/tests/wrappers/test_time_limit.py
+++ b/tests/wrappers/test_time_limit.py
@@ -6,7 +6,7 @@ from gym.wrappers import TimeLimit


 def test_time_limit_reset_info():
-    env = gym.make("CartPole-v1")
+    env = gym.make("CartPole-v1", disable_env_checker=True)
    env = TimeLimit(env)
    ob_space = env.observation_space
    obs = env.reset()
--- a/tests/wrappers/test_transform_observation.py
+++ b/tests/wrappers/test_transform_observation.py
@@ -10,9 +10,9 @@ def test_transform_observation(env_id):
    def affine_transform(x):
        return 3 * x + 2

-    env = gym.make(env_id)
+    env = gym.make(env_id, disable_env_checker=True)
    wrapped_env = TransformObservation(
-        gym.make(env_id), lambda obs: affine_transform(obs)
+        gym.make(env_id, disable_env_checker=True), lambda obs: affine_transform(obs)
    )

    obs = env.reset(seed=0)
--- a/tests/wrappers/test_transform_reward.py
+++ b/tests/wrappers/test_transform_reward.py
@@ -10,8 +10,10 @@ def test_transform_reward(env_id):
    # use case #1: scale
    scales = [0.1, 200]
    for scale in scales:
-        env = gym.make(env_id)
-        wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r)
+        env = gym.make(env_id, disable_env_checker=True)
+        wrapped_env = TransformReward(
+            gym.make(env_id, disable_env_checker=True), lambda r: scale * r
+        )
        action = env.action_space.sample()

        env.reset(seed=0)
@@ -26,8 +28,10 @@ def test_transform_reward(env_id):
    # use case #2: clip
    min_r = -0.0005
    max_r = 0.0002
-    env = gym.make(env_id)
-    wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r))
+    env = gym.make(env_id, disable_env_checker=True)
+    wrapped_env = TransformReward(
+        gym.make(env_id, disable_env_checker=True), lambda r: np.clip(r, min_r, max_r)
+    )
    action = env.action_space.sample()

    env.reset(seed=0)
@@ -41,8 +45,10 @@ def test_transform_reward(env_id):
    del env, wrapped_env

    # use case #3: sign
-    env = gym.make(env_id)
-    wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r))
+    env = gym.make(env_id, disable_env_checker=True)
+    wrapped_env = TransformReward(
+        gym.make(env_id, disable_env_checker=True), lambda r: np.sign(r)
+    )

    env.reset(seed=0)
    wrapped_env.reset(seed=0)
--- a/tests/wrappers/test_vector_list_info.py
+++ b/tests/wrappers/test_vector_list_info.py
@@ -10,8 +10,8 @@ SEED = 42


 def test_usage_in_vector_env():
-    env = gym.make(ENV_ID)
-    vector_env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
+    env = gym.make(ENV_ID, disable_env_checker=True)
+    vector_env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True)

    VectorListInfo(vector_env)

@@ -20,7 +20,7 @@ def test_usage_in_vector_env():


 def test_info_to_list():
-    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
+    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True)
    wrapped_env = VectorListInfo(env_to_wrap)
    wrapped_env.action_space.seed(SEED)
    _, info = wrapped_env.reset(seed=SEED, return_info=True)
@@ -38,7 +38,7 @@ def test_info_to_list():


 def test_info_to_list_statistics():
-    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
+    env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True)
    wrapped_env = VectorListInfo(RecordEpisodeStatistics(env_to_wrap))
    _, info = wrapped_env.reset(seed=SEED, return_info=True)
    wrapped_env.action_space.seed(SEED)
--- a/tests/wrappers/test_video_recorder.py
+++ b/tests/wrappers/test_video_recorder.py
@@ -29,7 +29,7 @@ class UnrecordableEnv:


 def test_record_simple():
-    env = gym.make("CartPole-v1", render_mode="rgb_array")
+    env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
@@ -49,7 +49,7 @@ def test_record_simple():

 def test_autoclose():
    def record():
-        env = gym.make("CartPole-v1", render_mode="rgb_array")
+        env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True)
        rec = VideoRecorder(env)
        env.reset()
        rec.capture_frame()
@@ -102,7 +102,7 @@ def test_record_breaking_render_method():


 def test_text_envs():
-    env = gym.make("FrozenLake-v1", render_mode="rgb_array")
+    env = gym.make("FrozenLake-v1", render_mode="rgb_array", disable_env_checker=True)
    video = VideoRecorder(env)
    try:
        env.reset()