Files
Gymnasium/tests/envs/mujoco/test_mujoco_v5.py
2023-09-08 11:51:50 +01:00

677 lines
22 KiB
Python

import collections
import warnings
import mujoco
import numpy as np
import pytest
import gymnasium as gym
from gymnasium.envs.mujoco.mujoco_env import BaseMujocoEnv, MujocoEnv
from gymnasium.error import Error
from gymnasium.utils.env_checker import check_env
from gymnasium.utils.env_match import check_environments_match
ALL_MUJOCO_ENVS = [
"Ant",
"HalfCheetah",
"Hopper",
"Humanoid",
"HumanoidStandup",
"InvertedDoublePendulum",
"InvertedPendulum",
"Pusher",
"Reacher",
"Swimmer",
"Walker2d",
]
# Note: "HumnanoidStandup-v4" does not have `info`
# Note: "Humnanoid-v4/3" & "Ant-v4/3" fail this test
@pytest.mark.parametrize(
"env_id",
[
"Ant-v5",
"HalfCheetah-v5",
"HalfCheetah-v4",
"HalfCheetah-v3",
"Hopper-v5",
"Hopper-v4",
"Hopper-v3",
"Humanoid-v5",
"HumanoidStandup-v5",
"Swimmer-v5",
"Swimmer-v4",
"Swimmer-v3",
"Walker2d-v5",
"Walker2d-v4",
"Walker2d-v3",
],
)
def test_verify_info_x_position(env_id: str):
"""Asserts that the environment has position[0] == info['x_position']."""
env = gym.make(env_id, exclude_current_positions_from_observation=False)
_, _ = env.reset()
obs, _, _, _, info = env.step(env.action_space.sample())
assert obs[0] == info["x_position"]
# Note: "HumnanoidStandup-v4" does not have `info`
# Note: "Humnanoid-v4/3" & "Ant-v4/3" fail this test
@pytest.mark.parametrize(
"env_id",
[
"Ant-v5",
"Humanoid-v5",
"HumanoidStandup-v5",
"Swimmer-v5",
"Swimmer-v4",
"Swimmer-v3",
],
)
def test_verify_info_y_position(env_id: str):
"""Asserts that the environment has position[1] == info['y_position']."""
env = gym.make(env_id, exclude_current_positions_from_observation=False)
_, _ = env.reset()
obs, _, _, _, info = env.step(env.action_space.sample())
assert obs[1] == info["y_position"]
# Note: "HumnanoidStandup-v4" does not have `info`
@pytest.mark.parametrize("env_name", ["HalfCheetah", "Hopper", "Swimmer", "Walker2d"])
@pytest.mark.parametrize("version", ["v5", "v4", "v3"])
def test_verify_info_x_velocity(env_name: str, version: str):
"""Asserts that the environment `info['x_velocity']` is properly assigned."""
env = gym.make(f"{env_name}-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset()
old_x = env.data.qpos[0]
_, _, _, _, info = env.step(env.action_space.sample())
new_x = env.data.qpos[0]
dx = new_x - old_x
vel_x = dx / env.dt
assert vel_x == info["x_velocity"]
# Note: "HumnanoidStandup-v4" does not have `info`
@pytest.mark.parametrize("env_id", ["Swimmer-v5", "Swimmer-v4", "Swimmer-v3"])
def test_verify_info_y_velocity(env_id: str):
"""Asserts that the environment `info['y_velocity']` is properly assigned."""
env = gym.make(env_id).unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset()
old_y = env.data.qpos[1]
_, _, _, _, info = env.step(env.action_space.sample())
new_y = env.data.qpos[1]
dy = new_y - old_y
vel_y = dy / env.dt
assert vel_y == info["y_velocity"]
@pytest.mark.parametrize("env_id", ["Ant-v5", "Ant-v4", "Ant-v3"])
def test_verify_info_xy_velocity_xpos(env_id: str):
"""Asserts that the environment `info['x/y_velocity']` is properly assigned, for the ant environment which uses kinmatics for the velocity."""
env = gym.make(env_id).unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset()
old_xy = env.get_body_com("torso")[:2].copy()
_, _, _, _, info = env.step(env.action_space.sample())
new_xy = env.get_body_com("torso")[:2].copy()
dxy = new_xy - old_xy
vel_x, vel_y = dxy / env.dt
assert vel_x == info["x_velocity"]
assert vel_y == info["y_velocity"]
@pytest.mark.parametrize("env_id", ["Humanoid-v5", "Humanoid-v4", "Humanoid-v3"])
def test_verify_info_xy_velocity_com(env_id: str):
"""Asserts that the environment `info['x/y_velocity']` is properly assigned, for the humanoid environment which uses kinmatics of Center Of Mass for the velocity."""
def mass_center(model, data):
mass = np.expand_dims(model.body_mass, axis=1)
xpos = data.xipos
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
env = gym.make(env_id).unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset()
old_xy = mass_center(env.model, env.data)
_, _, _, _, info = env.step(env.action_space.sample())
new_xy = mass_center(env.model, env.data)
dxy = new_xy - old_xy
vel_x, vel_y = dxy / env.dt
assert vel_x == info["x_velocity"]
assert vel_y == info["y_velocity"]
# Note: Hopper-v4/3/2 does not have `info['reward_survive']`, but it is still affected
# Note: Walker2d-v4/3/2 does not have `info['reward_survive']`, but it is still affected
# Note: Inverted(Double)Pendulum-v4/2 does not have `info['reward_survive']`, but it is still affected
# Note: all `v4/v3/v2` environments with a heathly reward are fail this test
@pytest.mark.parametrize(
"env_name",
[
"Ant",
"Hopper",
"Humanoid",
"InvertedDoublePendulum",
"InvertedPendulum",
"Walker2d",
],
)
@pytest.mark.parametrize("version", ["v5"])
def test_verify_reward_survive(env_name: str, version: str):
"""Assert that `reward_survive` is 0 on `terminal` states and not 0 on non-`terminal` states."""
env = gym.make(f"{env_name}-{version}", reset_noise_scale=0).unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset(seed=0)
env.action_space.seed(1)
terminal = False
for step in range(80):
obs, rew, terminal, truncated, info = env.step(env.action_space.sample())
if terminal:
assert info["reward_survive"] == 0
break
assert info["reward_survive"] != 0
assert (
terminal
), "The environment, should have terminated, if not the test is not valid."
CHECK_ENV_IGNORE_WARNINGS = [
f"\x1b[33mWARN: {message}\x1b[0m"
for message in [
"A Box observation space minimum value is -infinity. This is probably too low.",
"A Box observation space maximum value is infinity. This is probably too high.",
"For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.",
]
]
@pytest.mark.parametrize("env_name", ALL_MUJOCO_ENVS)
@pytest.mark.parametrize("version", ["v5"])
@pytest.mark.parametrize("frame_skip", [1, 2, 3, 4, 5])
def test_frame_skip(env_name: str, version: str, frame_skip: int):
"""Verify that all `mujoco` envs work with different `frame_skip` values."""
env_id = f"{env_name}-{version}"
env = gym.make(env_id, frame_skip=frame_skip)
# Test if env adheres to Gym API
with warnings.catch_warnings(record=True) as w:
check_env(env.unwrapped, skip_render_check=True)
env.close()
for warning in w:
if warning.message.args[0] not in CHECK_ENV_IGNORE_WARNINGS:
raise Error(f"Unexpected warning: {warning.message}")
# Dev Note: This can not be version env parametrized because each env has it's own reward function
@pytest.mark.parametrize("version", ["v5"])
def test_reward_sum(version: str):
"""Assert that the total reward equals the sum of the individual reward terms, also asserts that the reward function has no fp ordering arithmetic errors."""
NUM_STEPS = 100
env = gym.make(f"Ant-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert reward == (info["reward_survive"] + info["reward_forward"]) - (
-info["reward_ctrl"] + -info["reward_contact"]
)
env = gym.make(f"HalfCheetah-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert reward == info["reward_forward"] + info["reward_ctrl"]
env = gym.make(f"Hopper-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert (
reward
== info["reward_forward"] + info["reward_survive"] + info["reward_ctrl"]
)
env = gym.make(f"Humanoid-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert reward == (info["reward_forward"] + info["reward_survive"]) + (
info["reward_ctrl"] + info["reward_contact"]
)
env = gym.make(f"HumanoidStandup-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert (
reward
== info["reward_linup"]
+ info["reward_quadctrl"]
+ info["reward_impact"]
+ 1
)
env = gym.make(f"InvertedDoublePendulum-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert (
reward
== info["reward_survive"]
+ info["distance_penalty"]
+ info["velocity_penalty"]
)
env = gym.make(f"InvertedPendulum-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert reward == info["reward_survive"]
env = gym.make(f"Pusher-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert reward == info["reward_dist"] + info["reward_ctrl"] + info["reward_near"]
env = gym.make(f"Reacher-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert reward == info["reward_dist"] + info["reward_ctrl"]
env = gym.make(f"Swimmer-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert reward == info["reward_forward"] + info["reward_ctrl"]
env = gym.make(f"Walker2d-{version}")
env.reset()
for _ in range(NUM_STEPS):
_, reward, _, _, info = env.step(env.action_space.sample())
assert (
reward
== info["reward_forward"] + info["reward_survive"] + info["reward_ctrl"]
)
env_conf = collections.namedtuple("env_conf", "env_name, obs, rew, term, info")
# Note: the environtments "HalfCheetah", "Pusher", "Swimmer", are identical between `v4` & `v5` (excluding `info`)
@pytest.mark.parametrize(
"env_conf",
[
env_conf("Ant", True, True, False, "skip"),
env_conf("HalfCheetah", False, False, False, "skip"),
env_conf("Hopper", False, True, False, "superset"),
# skipping humanoid, everything has changed
env_conf("HumanoidStandup", True, False, False, "superset"),
env_conf("InvertedDoublePendulum", True, True, False, "superset"),
env_conf("InvertedPendulum", False, True, False, "superset"),
env_conf("Pusher", False, False, False, "keys-superset"),
env_conf("Reacher", True, False, False, "keys-equivalence"),
env_conf("Swimmer", False, False, False, "skip"),
env_conf("Walker2d", True, True, True, "keys-superset"),
],
)
def test_identical_behaviour_v45(env_conf):
"""Verify that v4 -> v5 transition. Does not change the behaviour of the environments in any unexpected way."""
NUM_STEPS = 100
env_v4 = gym.make(f"{env_conf.env_name}-v4")
env_v5 = gym.make(f"{env_conf.env_name}-v5")
check_environments_match(
env_v4,
env_v5,
NUM_STEPS,
skip_obs=env_conf.obs,
skip_rew=env_conf.rew,
skip_terminal=env_conf.term,
info_comparison=env_conf.info,
)
@pytest.mark.parametrize("version", ["v5", "v4"])
def test_ant_com(version: str):
"""Verify the kinmatic behaviour of the ant."""
# `env` contains `data : MjData` and `model : MjModel`
env = gym.make(f"Ant-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset() # randomly initlizies the `data.qpos` and `data.qvel`, calls mujoco.mj_forward(env.model, env.data)
x_position_before = env.data.qpos[0]
x_position_before_com = env.data.body("torso").xpos[0]
assert x_position_before == x_position_before_com, "before failed" # This succeeds
random_control = env.action_space.sample()
# This calls mujoco.mj_step(env.model, env.data, nstep=env.frame_skip)
_, _, _, _, info = env.step(random_control)
mujoco.mj_kinematics(env.model, env.data)
x_position_after = env.data.qpos[0]
x_position_after_com = env.data.body("torso").xpos[0]
assert x_position_after == x_position_after_com, "after failed" # This succeeds
@pytest.mark.parametrize("version", ["v5", "v4", "v3", "v2"])
def test_set_state(version: str):
"""Simple Test to verify that `mujocoEnv.set_state()` works correctly."""
env = gym.make(f"Hopper-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset()
new_qpos = np.array(
[0.00136962, 1.24769787, -0.00459026, -0.00483472, 0.0031327, 0.00412756]
)
new_qvel = np.array(
[0.00106636, 0.00229497, 0.00043625, 0.00435072, 0.00315854, -0.00497261]
)
env.set_state(new_qpos, new_qvel)
assert (env.data.qpos == new_qpos).all()
assert (env.data.qvel == new_qvel).all()
# Note: HumanoidStandup-v4/v3 does not have `info`
# Note: Ant-v4/v3 fails this test
# Note: Humanoid-v4/v3 fails this test
# Note: v2 does not have `info`
@pytest.mark.parametrize(
"env_id", ["Ant-v5", "Humanoid-v5", "Swimmer-v5", "Swimmer-v4", "Swimmer-v3"]
)
def test_distance_from_origin_info(env_id: str):
"""Verify that `info"distance_from_origin"` is correct."""
env = gym.make(env_id).unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset()
_, _, _, _, info = env.step(env.action_space.sample())
assert info["distance_from_origin"] == np.linalg.norm(
env.data.qpos[0:2] - env.init_qpos[0:2]
)
@pytest.mark.parametrize("env_name", ["Hopper", "HumanoidStandup", "Walker2d"])
@pytest.mark.parametrize("version", ["v5"])
def test_z_distance_from_origin_info(env_name: str, version: str):
"""Verify that `info["z_distance_from_origin"]` is correct."""
env = gym.make(f"{env_name}-{version}").unwrapped
assert isinstance(env, MujocoEnv)
env.reset()
_, _, _, _, info = env.step(env.action_space.sample())
mujoco.mj_kinematics(env.model, env.data)
z_index = env.observation_structure["skipped_qpos"]
assert (
info["z_distance_from_origin"]
== env.data.qpos[z_index] - env.init_qpos[z_index]
)
@pytest.mark.parametrize("env_name", ALL_MUJOCO_ENVS)
@pytest.mark.parametrize("version", ["v5"])
def test_observation_structure(env_name: str, version: str):
"""Verify that the `env.observation_structure` is properly defined."""
env = gym.make(f"{env_name}-{version}").unwrapped
assert isinstance(env, MujocoEnv)
if not hasattr(env, "observation_structure"):
return
obs_struct = env.observation_structure
assert env.model.nq == obs_struct.get("skipped_qpos", 0) + obs_struct["qpos"]
assert env.model.nv == obs_struct["qvel"]
if obs_struct.get("cinert", False):
assert (env.model.nbody - 1) * 10 == obs_struct["cinert"]
if obs_struct.get("cvel", False):
assert (env.model.nbody - 1) * 6 == obs_struct["cvel"]
if obs_struct.get("qfrc_actuator", False):
assert env.model.nv - 6 == obs_struct["qfrc_actuator"]
if obs_struct.get("cfrc_ext", False):
assert (env.model.nbody - 1) * 6 == obs_struct["cfrc_ext"]
if obs_struct.get("ten_lenght", False):
assert env.model.ntendon == obs_struct["ten_lenght"]
if obs_struct.get("ten_velocity", False):
assert env.model.ntendon == obs_struct["ten_velocity"]
@pytest.mark.parametrize(
"env_name",
[
"Ant",
"HalfCheetah",
"Hopper",
"Humanoid",
"HumanoidStandup",
# "InvertedDoublePendulum",
# "InvertedPendulum",
# "Pusher",
# "Reacher",
"Swimmer",
"Walker2d",
],
)
@pytest.mark.parametrize("version", ["v5"])
def test_reset_info(env_name: str, version: str):
"""Verify that the environment returns info with `reset()`."""
env = gym.make(f"{env_name}-{version}")
_, reset_info = env.reset()
assert len(reset_info) > 0
# Note: the max height used to be wrong in the documentation. (1.196m instead of 1.2m)
@pytest.mark.parametrize("version", ["v5"])
def test_inverted_double_pendulum_max_height(version: str):
"""Verify the max height of Inverted Double Pendulum."""
env = gym.make(f"InvertedDoublePendulum-{version}", reset_noise_scale=0).unwrapped
assert isinstance(env, BaseMujocoEnv)
env.reset()
y = env.data.site_xpos[0][2]
assert y == 1.2
@pytest.mark.parametrize("version", ["v4"])
def test_inverted_double_pendulum_max_height_old(version: str):
"""Verify the max height of Inverted Double Pendulum (v4 does not have `reset_noise_scale` argument)."""
env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
env.set_state(env.init_qpos, env.init_qvel)
y = env.data.site_xpos[0][2]
assert y == 1.2
# note: fails with `brax==0.9.0`
@pytest.mark.parametrize("version", ["v5", "v4"])
def test_model_object_count(version: str):
"""Verify that all the objects of the model are loaded, mostly useful for using non-mujoco simulator."""
env = gym.make(f"Ant-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 15
assert env.model.nv == 14
assert env.model.nu == 8
assert env.model.nbody == 14
assert env.model.nbvh == 14
assert env.model.njnt == 9
assert env.model.ngeom == 14
assert env.model.ntendon == 0
env = gym.make(f"HalfCheetah-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 9
assert env.model.nv == 9
assert env.model.nu == 6
assert env.model.nbody == 8
assert env.model.nbvh == 10
assert env.model.njnt == 9
assert env.model.ngeom == 9
assert env.model.ntendon == 0
env = gym.make(f"Hopper-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 6
assert env.model.nv == 6
assert env.model.nu == 3
assert env.model.nbody == 5
assert env.model.nbvh == 5
assert env.model.njnt == 6
assert env.model.ngeom == 5
assert env.model.ntendon == 0
env = gym.make(f"Humanoid-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 24
assert env.model.nv == 23
assert env.model.nu == 17
assert env.model.nbody == 14
assert env.model.nbvh == 22
assert env.model.njnt == 18
assert env.model.ngeom == 18
assert env.model.ntendon == 2
env = gym.make(f"HumanoidStandup-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 24
assert env.model.nv == 23
assert env.model.nu == 17
assert env.model.nbody == 14
assert env.model.nbvh == 22
assert env.model.njnt == 18
assert env.model.ngeom == 18
assert env.model.ntendon == 2
env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 3
assert env.model.nv == 3
assert env.model.nu == 1
assert env.model.nbody == 4
assert env.model.nbvh == 6
assert env.model.njnt == 3
assert env.model.ngeom == 5
assert env.model.ntendon == 0
env = gym.make(f"InvertedPendulum-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 2
assert env.model.nv == 2
assert env.model.nu == 1
assert env.model.nbody == 3
assert env.model.nbvh == 3
assert env.model.njnt == 2
assert env.model.ngeom == 3
assert env.model.ntendon == 0
env = gym.make(f"Pusher-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 11
assert env.model.nv == 11
assert env.model.nu == 7
assert env.model.nbody == 13
assert env.model.nbvh == 18
assert env.model.njnt == 11
assert env.model.ngeom == 21
assert env.model.ntendon == 0
env = gym.make(f"Reacher-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 4
assert env.model.nv == 4
assert env.model.nu == 2
assert env.model.nbody == 5
assert env.model.nbvh == 5
assert env.model.njnt == 4
assert env.model.ngeom == 10
assert env.model.ntendon == 0
env = gym.make(f"Swimmer-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 5
assert env.model.nv == 5
assert env.model.nu == 2
assert env.model.nbody == 4
assert env.model.nbvh == 4
assert env.model.njnt == 5
assert env.model.ngeom == 4
assert env.model.ntendon == 0
env = gym.make(f"Walker2d-{version}").unwrapped
assert isinstance(env, BaseMujocoEnv)
assert env.model.nq == 9
assert env.model.nv == 9
assert env.model.nu == 6
assert env.model.nbody == 8
assert env.model.nbvh == 8
assert env.model.njnt == 9
assert env.model.ngeom == 8
assert env.model.ntendon == 0
def test_dt():
"""Assert that env.dt gets assigned correctly."""
env_a = gym.make("Ant-v5", include_cfrc_ext_in_observation=False).unwrapped
env_b = gym.make(
"Ant-v5", include_cfrc_ext_in_observation=False, frame_skip=1
).unwrapped
assert isinstance(env_a, BaseMujocoEnv)
assert isinstance(env_b, BaseMujocoEnv)
env_b.model.opt.timestep = 0.05
assert env_a.dt == env_b.dt
# check_environments_match(env_a, env_b, num_steps=100) # This Fails as expected
@pytest.mark.parametrize(
"env_id",
[
"Ant-v5",
"Ant-v4",
"Ant-v3",
"HalfCheetah-v5",
"HalfCheetah-v4",
"HalfCheetah-v3",
"Hopper-v5",
"Hopper-v4",
"Hopper-v3",
"Humanoid-v5",
"Humanoid-v4",
"Humanoid-v3",
"HumanoidStandup-v5",
"InvertedDoublePendulum-v5",
"InvertedPendulum-v5",
"Swimmer-v5",
"Swimmer-v4",
"Swimmer-v3",
"Walker2d-v5",
"Walker2d-v4",
"Walker2d-v3",
],
)
def test_reset_noise_scale(env_id):
"""Checks that when `reset_noise_scale=0` we have deterministic initialization."""
env: BaseMujocoEnv = gym.make(env_id, reset_noise_scale=0).unwrapped
env.reset()
assert np.all(env.data.qpos == env.init_qpos)
assert np.all(env.data.qvel == env.init_qvel)