migrating the MuJoCo environments v2 and v3 to Gymnasium-Robotics (mujoco-py based) (#1392)

This commit is contained in:
Kallinteris Andreas
2025-06-07 17:26:04 +03:00
committed by GitHub
parent de2c10ef1a
commit f20e3f4845
26 changed files with 42 additions and 2401 deletions

View File

@@ -11,22 +11,10 @@ RUN apt-get -y update \
xvfb unzip patchelf ffmpeg cmake swig \
&& apt-get autoremove -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
# Download mujoco
&& mkdir /root/.mujoco \
&& cd /root/.mujoco \
&& wget -qO- 'https://github.com/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz' | tar -xzvf -
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin"
&& rm -rf /var/lib/apt/lists/*
RUN pip install uv
# Build mujoco-py from source. Pypi installs wheel packages and Cython won't recompile old file versions in the Github Actions CI.
# Thus generating the following error https://github.com/cython/cython/pull/4428
RUN git clone https://github.com/openai/mujoco-py.git\
&& cd mujoco-py \
&& uv pip install --system -e .
COPY . /usr/local/gymnasium/
WORKDIR /usr/local/gymnasium/

View File

@@ -38,7 +38,12 @@ As of October 2021, DeepMind has acquired MuJoCo and has open-sourced it in 2022
Using MuJoCo with Gymnasium requires the framework `mujoco` be installed (this dependency is installed with the above command).
Instructions for installing the MuJoCo engine can be found on their [website](https://mujoco.org) and [GitHub repository](https://github.com/deepmind/mujoco).
For MuJoCo `v3` environments and older the `mujoco-py` framework is required (`pip install gymnasium[mujoco-py]`) which can be found in the [GitHub repository](https://github.com/openai/mujoco-py/tree/master/mujoco_py).
MuJoCo `v3` environments and older, which relied on the `mujoco-py` framework, were migrated to the `gymnasium-robotics` package starting with `gymnasium` v1.2. For information on using these older versions, please refer to the `gymnasium-robotics` documentation. The original `mujoco-py` framework can be found in its [GitHub repository](https://github.com/openai/mujoco-py/tree/master/mujoco_py).
The dependencies for the old environment versions can be installed via:
````bash
pip install gymnasium_robotics[mujoco-py]
````
There are eleven MuJoCo environments (in roughly increasing complexity):
@@ -76,12 +81,12 @@ Environments can be configured by changing the `xml_file` argument and/or by twe
## Versions
Gymnasium includes the following versions of the environments:
| Version | Simulator | Notes |
|---------|-----------------|--------------------------------------------------------|
| `v5` | `mujoco=>2.3.3` | Recommended (most features, the least bugs) |
| `v4` | `mujoco=>2.1.3` | Maintained for reproducibility |
| `v3` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) |
| `v2` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) |
| Version | Simulator | Notes |
|---------|-----------------|------------------------------------------------------------------------------------------------------|
| `v5` | `mujoco=>2.3.3` | Recommended (most features, the least bugs) |
| `v4` | `mujoco=>2.1.3` | Maintained for reproducibility |
| `v3` | `mujoco-py` | Migrated to `gymnasium-robotics` (from `gymnasium` v1.2). Deprecated, Kept for reproducibility. |
| `v2` | `mujoco-py` | Migrated to `gymnasium-robotics` (from `gymnasium` v1.2). Deprecated, Kept for reproducibility. |
For more information, see the section "Version History" for each environment.

View File

@@ -186,13 +186,6 @@ register(
# manipulation
register(
id="Reacher-v2",
entry_point="gymnasium.envs.mujoco.reacher:ReacherEnv",
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id="Reacher-v4",
entry_point="gymnasium.envs.mujoco.reacher_v4:ReacherEnv",
@@ -207,13 +200,6 @@ register(
reward_threshold=-3.75,
)
register(
id="Pusher-v2",
entry_point="gymnasium.envs.mujoco.pusher:PusherEnv",
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id="Pusher-v4",
entry_point="gymnasium.envs.mujoco.pusher_v4:PusherEnv",
@@ -230,13 +216,6 @@ register(
# balance
register(
id="InvertedPendulum-v2",
entry_point="gymnasium.envs.mujoco.inverted_pendulum:InvertedPendulumEnv",
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id="InvertedPendulum-v4",
entry_point="gymnasium.envs.mujoco.inverted_pendulum_v4:InvertedPendulumEnv",
@@ -251,13 +230,6 @@ register(
reward_threshold=950.0,
)
register(
id="InvertedDoublePendulum-v2",
entry_point="gymnasium.envs.mujoco.inverted_double_pendulum:InvertedDoublePendulumEnv",
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id="InvertedDoublePendulum-v4",
entry_point="gymnasium.envs.mujoco.inverted_double_pendulum_v4:InvertedDoublePendulumEnv",
@@ -274,20 +246,6 @@ register(
# runners
register(
id="HalfCheetah-v2",
entry_point="gymnasium.envs.mujoco.half_cheetah:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="HalfCheetah-v3",
entry_point="gymnasium.envs.mujoco.half_cheetah_v3:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="HalfCheetah-v4",
entry_point="gymnasium.envs.mujoco.half_cheetah_v4:HalfCheetahEnv",
@@ -302,20 +260,6 @@ register(
reward_threshold=4800.0,
)
register(
id="Hopper-v2",
entry_point="gymnasium.envs.mujoco.hopper:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Hopper-v3",
entry_point="gymnasium.envs.mujoco.hopper_v3:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Hopper-v4",
entry_point="gymnasium.envs.mujoco.hopper_v4:HopperEnv",
@@ -330,20 +274,6 @@ register(
reward_threshold=3800.0,
)
register(
id="Swimmer-v2",
entry_point="gymnasium.envs.mujoco.swimmer:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Swimmer-v3",
entry_point="gymnasium.envs.mujoco.swimmer_v3:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Swimmer-v4",
entry_point="gymnasium.envs.mujoco.swimmer_v4:SwimmerEnv",
@@ -358,18 +288,6 @@ register(
reward_threshold=360.0,
)
register(
id="Walker2d-v2",
max_episode_steps=1000,
entry_point="gymnasium.envs.mujoco.walker2d:Walker2dEnv",
)
register(
id="Walker2d-v3",
max_episode_steps=1000,
entry_point="gymnasium.envs.mujoco.walker2d_v3:Walker2dEnv",
)
register(
id="Walker2d-v4",
max_episode_steps=1000,
@@ -382,20 +300,6 @@ register(
entry_point="gymnasium.envs.mujoco.walker2d_v5:Walker2dEnv",
)
register(
id="Ant-v2",
entry_point="gymnasium.envs.mujoco.ant:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Ant-v3",
entry_point="gymnasium.envs.mujoco.ant_v3:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Ant-v4",
entry_point="gymnasium.envs.mujoco.ant_v4:AntEnv",
@@ -410,18 +314,6 @@ register(
reward_threshold=6000.0,
)
register(
id="Humanoid-v2",
entry_point="gymnasium.envs.mujoco.humanoid:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="Humanoid-v3",
entry_point="gymnasium.envs.mujoco.humanoid_v3:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="Humanoid-v4",
entry_point="gymnasium.envs.mujoco.humanoid_v4:HumanoidEnv",
@@ -434,12 +326,6 @@ register(
max_episode_steps=1000,
)
register(
id="HumanoidStandup-v2",
entry_point="gymnasium.envs.mujoco.humanoidstandup:HumanoidStandupEnv",
max_episode_steps=1000,
)
register(
id="HumanoidStandup-v4",
entry_point="gymnasium.envs.mujoco.humanoidstandup_v4:HumanoidStandupEnv",

View File

@@ -1,81 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class AntEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, "ant.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore) / self.dt
ctrl_cost = 0.5 * np.square(a).sum()
contact_cost = (
0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
)
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
not_terminated = (
np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
)
terminated = not not_terminated
ob = self._get_obs()
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,
terminated,
False,
dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward,
),
)
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.1, high=0.1
)
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@@ -1,188 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class AntEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="ant.xml",
ctrl_cost_weight=0.5,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
ctrl_cost_weight,
contact_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
contact_force_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._contact_force_range = contact_force_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def contact_forces(self):
raw_contact_forces = self.sim.data.cfrc_ext
min_value, max_value = self._contact_force_range
contact_forces = np.clip(raw_contact_forces, min_value, max_value)
return contact_forces
@property
def contact_cost(self):
contact_cost = self._contact_cost_weight * np.sum(
np.square(self.contact_forces)
)
return contact_cost
@property
def is_healthy(self):
state = self.state_vector()
min_z, max_z = self._healthy_z_range
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def step(self, action):
xy_position_before = self.get_body_com("torso")[:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.get_body_com("torso")[:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
forward_reward = x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost + contact_cost
reward = rewards - costs
terminated = self.terminated
observation = self._get_obs()
info = {
"reward_forward": forward_reward,
"reward_ctrl": -ctrl_cost,
"reward_contact": -contact_cost,
"reward_survive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
contact_force = self.contact_forces.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
observations = np.concatenate((position, velocity, contact_force))
return observations
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@@ -1,65 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = -0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore) / self.dt
reward = reward_ctrl + reward_run
terminated = False
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,
terminated,
False,
dict(reward_run=reward_run, reward_ctrl=reward_ctrl),
)
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@@ -1,129 +0,0 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"distance": 4.0,
}
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(
self,
xml_file="half_cheetah.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
observation = self._get_obs()
reward = forward_reward - ctrl_cost
terminated = False
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
"reward_run": forward_reward,
"reward_ctrl": -ctrl_cost,
}
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@@ -1,68 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "hopper.xml", 4, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
s = self.state_vector()
terminated = not (
np.isfinite(s).all()
and (np.abs(s[2:]) < 100).all()
and (height > 0.7)
and (abs(ang) < 0.2)
)
ob = self._get_obs()
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
)
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.75
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@@ -1,179 +0,0 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 3.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
xml_file="hopper.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float("inf")),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_state_range,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_state_range = healthy_state_range
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 4, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.sim.data.qpos[1:3]
state = self.state_vector()[2:]
min_state, max_state = self._healthy_state_range
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = all((healthy_state, healthy_z, healthy_angle))
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@@ -1,95 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, "humanoid.xml", 5, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.sim.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.sim.data
lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.sim.data.qpos
terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
self._get_obs(),
reward,
terminated,
False,
dict(
reward_linvel=lin_vel_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_alive=alive_bonus,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 2.0
self.viewer.cam.elevation = -20

View File

@@ -1,201 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 2.0)),
"elevation": -20.0,
}
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, axis=1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(
self,
xml_file="humanoid.xml",
forward_reward_weight=1.25,
ctrl_cost_weight=0.1,
contact_cost_weight=5e-7,
contact_cost_range=(-np.inf, 10.0),
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
contact_cost_weight,
contact_cost_range,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._contact_cost_range = contact_cost_range
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl))
return control_cost
@property
def contact_cost(self):
contact_forces = self.sim.data.cfrc_ext
contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces))
min_cost, max_cost = self._contact_cost_range
contact_cost = np.clip(contact_cost, min_cost, max_cost)
return contact_cost
@property
def is_healthy(self):
min_z, max_z = self._healthy_z_range
is_healthy = min_z < self.sim.data.qpos[2] < max_z
return is_healthy
@property
def terminated(self):
terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
com_inertia = self.sim.data.cinert.flat.copy()
com_velocity = self.sim.data.cvel.flat.copy()
actuator_forces = self.sim.data.qfrc_actuator.flat.copy()
external_contact_forces = self.sim.data.cfrc_ext.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
return np.concatenate(
(
position,
velocity,
com_inertia,
com_velocity,
actuator_forces,
external_contact_forces,
)
)
def step(self, action):
xy_position_before = mass_center(self.model, self.sim)
self.do_simulation(action, self.frame_skip)
xy_position_after = mass_center(self.model, self.sim)
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost + contact_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"reward_linvel": forward_reward,
"reward_quadctrl": -ctrl_cost,
"reward_alive": healthy_reward,
"reward_impact": -contact_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@@ -1,88 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 67,
}
def __init__(self, **kwargs):
observation_space = Box(
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self,
"humanoidstandup.xml",
5,
observation_space=observation_space,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def _get_obs(self):
data = self.sim.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
]
)
def step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
self._get_obs(),
reward,
False,
False,
dict(
reward_linup=uph_cost,
reward_quadctrl=-quad_ctrl_cost,
reward_impact=-quad_impact_cost,
),
)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(
low=-c,
high=c,
size=self.model.nv,
),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 0.8925
self.viewer.cam.elevation = -20

View File

@@ -1,70 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self,
"inverted_double_pendulum.xml",
5,
observation_space=observation_space,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x**2 + (y - 2) ** 2
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, r, terminated, False, {}
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10),
]
).ravel()
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent * 0.5
v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]

View File

@@ -1,57 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
MuJocoPyEnv.__init__(
self,
"inverted_pendulum.xml",
2,
observation_space=observation_space,
**kwargs,
)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.01, high=0.01
)
qvel = self.init_qvel + self.np_random.uniform(
size=self.model.nv, low=-0.01, high=0.01
)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 0
self.viewer.cam.distance = self.model.stat.extent

View File

@@ -1,350 +0,0 @@
# type: ignore
from os import path
from typing import Any, Dict, Optional, Tuple, Union
import numpy as np
from numpy.typing import NDArray
import gymnasium as gym
from gymnasium import error, logger, spaces
from gymnasium.spaces import Space
try:
import mujoco_py
except ImportError as e:
MUJOCO_PY_IMPORT_ERROR = e
else:
MUJOCO_PY_IMPORT_ERROR = None
# NOTE: duplication of analogous code in mujoco_env.py
# Support for mujoco-py based envs is deprecated, so this module will no longer be maintained
# The code is duplicated instead of imported for it to be working in standalone and independent
# of the further development of the maintained mujoco_env.py
# noinspection DuplicatedCode
DEFAULT_SIZE = 480
# noinspection DuplicatedCode
def expand_model_path(model_path: str) -> str:
"""Expands the `model_path` to a full path if it starts with '~' or '.' or '/'."""
if model_path.startswith(".") or model_path.startswith("/"):
fullpath = model_path
elif model_path.startswith("~"):
fullpath = path.expanduser(model_path)
else:
fullpath = path.join(path.dirname(__file__), "assets", model_path)
if not path.exists(fullpath):
raise OSError(f"File {fullpath} does not exist")
return fullpath
# noinspection DuplicatedCode
class BaseMujocoPyEnv(gym.Env[NDArray[np.float64], NDArray[np.float32]]):
"""Superclass for all MuJoCo environments."""
def __init__(
self,
model_path,
frame_skip,
observation_space: Optional[Space],
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
"""Base abstract class for mujoco based environments.
Args:
model_path: Path to the MuJoCo Model.
frame_skip: Number of MuJoCo simulation steps per gym `step()`.
observation_space: The observation space of the environment.
render_mode: The `render_mode` used.
width: The width of the render window.
height: The height of the render window.
camera_id: The camera ID used.
camera_name: The name of the camera used (can not be used in conjunction with `camera_id`).
Raises:
OSError: when the `model_path` does not exist.
error.DependencyNotInstalled: When `mujoco` is not installed.
"""
self.fullpath = expand_model_path(model_path)
self.width = width
self.height = height
# may use width and height
self.model, self.data = self._initialize_simulation()
self.init_qpos = self.data.qpos.ravel().copy()
self.init_qvel = self.data.qvel.ravel().copy()
self.frame_skip = frame_skip
assert self.metadata["render_modes"] == [
"human",
"rgb_array",
"depth_array",
], self.metadata["render_modes"]
if "render_fps" in self.metadata:
assert (
int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
if observation_space is not None:
self.observation_space = observation_space
self._set_action_space()
self.render_mode = render_mode
self.camera_name = camera_name
self.camera_id = camera_id
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
# methods to override:
# ----------------------------
def step(
self, action: NDArray[np.float32]
) -> Tuple[NDArray[np.float64], np.float64, bool, bool, Dict[str, np.float64]]:
raise NotImplementedError
def reset_model(self) -> NDArray[np.float64]:
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def _initialize_simulation(self) -> Tuple[Any, Any]:
"""
Initialize MuJoCo simulation data structures mjModel and mjData.
"""
raise NotImplementedError
def _reset_simulation(self) -> None:
"""
Reset MuJoCo simulation data structures, mjModel and mjData.
"""
raise NotImplementedError
def _step_mujoco_simulation(self, ctrl, n_frames) -> None:
"""
Step over the MuJoCo simulation.
"""
raise NotImplementedError
def render(self) -> Union[NDArray[np.float64], None]:
"""
Render a frame from the MuJoCo simulation as specified by the render_mode.
"""
raise NotImplementedError
# -----------------------------
def _get_reset_info(self) -> Dict[str, float]:
"""Function that generates the `info` that is returned during a `reset()`."""
return {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._reset_simulation()
ob = self.reset_model()
info = self._get_reset_info()
if self.render_mode == "human":
self.render()
return ob, info
def set_state(self, qpos, qvel) -> None:
"""
Set the joints position qpos and velocity qvel of the model. Override this method depending on the MuJoCo bindings used.
"""
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
@property
def dt(self) -> float:
return self.model.opt.timestep * self.frame_skip
def do_simulation(self, ctrl, n_frames) -> None:
"""
Step the simulation n number of frames and applying a control action.
"""
# Check control input is contained in the action space
if np.array(ctrl).shape != (self.model.nu,):
raise ValueError(
f"Action dimension mismatch. Expected {(self.model.nu,)}, found {np.array(ctrl).shape}"
)
self._step_mujoco_simulation(ctrl, n_frames)
def close(self):
"""Close all processes like rendering contexts"""
raise NotImplementedError
def get_body_com(self, body_name) -> NDArray[np.float64]:
"""Return the cartesian position of a body frame"""
raise NotImplementedError
def state_vector(self) -> NDArray[np.float64]:
"""Return the position and velocity joint states of the model"""
return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])
class MuJocoPyEnv(BaseMujocoPyEnv):
def __init__(
self,
model_path: str,
frame_skip: int,
observation_space: Space,
render_mode: Optional[str] = None,
width: int = DEFAULT_SIZE,
height: int = DEFAULT_SIZE,
camera_id: Optional[int] = None,
camera_name: Optional[str] = None,
):
if MUJOCO_PY_IMPORT_ERROR is not None:
raise error.DependencyNotInstalled(
f"{MUJOCO_PY_IMPORT_ERROR}. "
"Could not import mujoco_py, which is needed for MuJoCo environments older than V4",
"You could either use a newer version of the environments, or install the (deprecated) mujoco-py package"
"following the instructions on their GitHub page.",
)
logger.deprecation(
"This version of the mujoco environments depends "
"on the mujoco-py bindings, which are no longer maintained "
"and may stop working. Please upgrade to the v5 or v4 versions of "
"the environments (which depend on the mujoco python bindings instead), unless "
"you are trying to precisely replicate previous works)."
)
self.viewer = None
self._viewers = {}
super().__init__(
model_path,
frame_skip,
observation_space,
render_mode,
width,
height,
camera_id,
camera_name,
)
def _initialize_simulation(self):
model = mujoco_py.load_model_from_path(self.fullpath)
self.sim = mujoco_py.MjSim(model)
data = self.sim.data
return model, data
def _reset_simulation(self):
self.sim.reset()
def set_state(self, qpos, qvel):
super().set_state(qpos, qvel)
state = self.sim.get_state()
state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state)
self.sim.set_state(state)
self.sim.forward()
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
def _step_mujoco_simulation(self, ctrl, n_frames):
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
self.sim.step()
def render(self):
if self.render_mode is None:
assert self.spec is not None
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
)
return
width, height = self.width, self.height
camera_name, camera_id = self.camera_name, self.camera_id
if self.render_mode in {"rgb_array", "depth_array"}:
if camera_id is not None and camera_name is not None:
raise ValueError(
"Both `camera_id` and `camera_name` cannot be"
" specified at the same time."
)
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = "track"
if camera_id is None and camera_name in self.model._camera_name2id:
if camera_name in self.model._camera_name2id:
camera_id = self.model.camera_name2id(camera_name)
self._get_viewer(self.render_mode).render(
width, height, camera_id=camera_id
)
if self.render_mode == "rgb_array":
data = self._get_viewer(self.render_mode).read_pixels(
width, height, depth=False
)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif self.render_mode == "depth_array":
self._get_viewer(self.render_mode).render(width, height)
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(self.render_mode).read_pixels(
width, height, depth=True
)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif self.render_mode == "human":
self._get_viewer(self.render_mode).render()
def _get_viewer(
self, mode
) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]:
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == "human":
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode in {"rgb_array", "depth_array"}:
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
else:
raise AttributeError(
f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}"
)
self.viewer_setup()
self._viewers[mode] = self.viewer
return self.viewer
def close(self):
if self.viewer is not None:
self.viewer = None
self._viewers = {}
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
Optionally implement this method, if you need to tinker with camera position and so forth.
"""
raise NotImplementedError

View File

@@ -1,85 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class PusherEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "pusher.xml", 5, observation_space=observation_space, **kwargs
)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
reward_near = -np.linalg.norm(vec_1)
reward_dist = -np.linalg.norm(vec_2)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,
False,
False,
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
)
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = -1
self.viewer.cam.distance = 4.0
def reset_model(self):
qpos = self.init_qpos
self.goal_pos = np.asarray([0, 0])
while True:
self.cylinder_pos = np.concatenate(
[
self.np_random.uniform(low=-0.3, high=0, size=1),
self.np_random.uniform(low=-0.2, high=0.2, size=1),
]
)
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
break
qpos[-4:-2] = self.cylinder_pos
qpos[-2:] = self.goal_pos
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate(
[
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
]
)

View File

@@ -1,76 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 50,
}
def __init__(self, **kwargs):
utils.EzPickle.__init__(self, **kwargs)
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "reacher.xml", 2, observation_space=observation_space, **kwargs
)
def step(self, a):
vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist = -np.linalg.norm(vec)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,
False,
False,
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
)
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 0
def reset_model(self):
qpos = (
self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
+ self.init_qpos
)
while True:
self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
if np.linalg.norm(self.goal) < 0.2:
break
qpos[-2:] = self.goal
qvel = self.init_qvel + self.np_random.uniform(
low=-0.005, high=0.005, size=self.model.nv
)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:2]
return np.concatenate(
[
np.cos(theta),
np.sin(theta),
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target"),
]
)

View File

@@ -1,60 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "swimmer.xml", 4, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
ctrl_cost_coeff = 0.0001
xposbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.sim.data.qpos[0]
reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return (
ob,
reward,
False,
False,
dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl),
)
def _get_obs(self):
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos.flat[2:], qvel.flat])
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv),
)
return self._get_obs()

View File

@@ -1,129 +0,0 @@
__credits__ = ["Rushiv Arora"]
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {}
class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 25,
}
def __init__(
self,
xml_file="swimmer.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-4,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 4, observation_space=observation_space, **kwargs
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
def step(self, action):
xy_position_before = self.sim.data.qpos[0:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.sim.data.qpos[0:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
forward_reward = self._forward_reward_weight * x_velocity
ctrl_cost = self.control_cost(action)
observation = self._get_obs()
reward = forward_reward - ctrl_cost
info = {
"reward_fwd": forward_reward,
"reward_ctrl": -ctrl_cost,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, False, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
observation = np.concatenate([position, velocity]).ravel()
return observation
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@@ -1,61 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(self, **kwargs):
observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
MuJocoPyEnv.__init__(
self, "walker2d.xml", 4, observation_space=observation_space, **kwargs
)
utils.EzPickle.__init__(self, **kwargs)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
terminated = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
ob = self._get_obs()
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return ob, reward, terminated, False, {}
def _get_obs(self):
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
def reset_model(self):
self.set_state(
self.init_qpos
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq),
self.init_qvel
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv),
)
return self._get_obs()
def viewer_setup(self):
assert self.viewer is not None
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.5
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@@ -1,168 +0,0 @@
import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
from gymnasium.spaces import Box
DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 2,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 1.15)),
"elevation": -20.0,
}
class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 125,
}
def __init__(
self,
xml_file="walker2d.xml",
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.8, 2.0),
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True,
**kwargs,
):
utils.EzPickle.__init__(
self,
xml_file,
forward_reward_weight,
ctrl_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
healthy_angle_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs,
)
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range
self._reset_noise_scale = reset_noise_scale
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
)
MuJocoPyEnv.__init__(
self, xml_file, 4, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def is_healthy(self):
z, angle = self.sim.data.qpos[1:3]
min_z, max_z = self._healthy_z_range
min_angle, max_angle = self._healthy_angle_range
healthy_z = min_z < z < max_z
healthy_angle = min_angle < angle < max_angle
is_healthy = healthy_z and healthy_angle
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
if self._exclude_current_positions_from_observation:
position = position[1:]
observation = np.concatenate((position, velocity)).ravel()
return observation
def step(self, action):
x_position_before = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
x_position_after = self.sim.data.qpos[0]
x_velocity = (x_position_after - x_position_before) / self.dt
ctrl_cost = self.control_cost(action)
forward_reward = self._forward_reward_weight * x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost
observation = self._get_obs()
reward = rewards - costs
terminated = self.terminated
info = {
"x_position": x_position_after,
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
return observation, reward, terminated, False, info
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
qvel = self.init_qvel + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nv
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)

View File

@@ -36,11 +36,6 @@ atari = ["ale_py >=0.9"]
box2d = ["box2d-py ==2.3.5", "pygame >=2.1.3", "swig ==4.*"]
classic-control = ["pygame >=2.1.3"]
classic_control = ["pygame >=2.1.3"] # kept for backward compatibility
mujoco-py = ["mujoco-py >=2.1,<2.2", "cython<3"]
mujoco_py = [
"mujoco-py >=2.1,<2.2",
"cython<3",
] # kept for backward compatibility
mujoco = ["mujoco >=2.1.5", "imageio >=2.14.1", "packaging >=23.0"]
toy-text = ["pygame >=2.1.3"]
toy_text = ["pygame >=2.1.3"] # kept for backward compatibility
@@ -70,9 +65,6 @@ all = [
"swig ==4.*",
# classic-control
"pygame >=2.1.3",
# mujoco-py
"mujoco-py >=2.1,<2.2",
"cython <3",
# mujoco
"mujoco >=2.1.5",
"imageio >=2.14.1",

View File

@@ -1,49 +1,11 @@
import numpy as np
import pytest
import gymnasium as gym
from gymnasium import envs
from gymnasium.envs.registration import EnvSpec
from tests.envs.utils import mujoco_testing_env_specs
EPS = 1e-6
def verify_environments_match(
old_env_id: str, new_env_id: str, seed: int = 1, num_actions: int = 1000
):
"""Verifies with two environment ids (old and new) are identical in obs, reward and done
(except info where all old info must be contained in new info)."""
old_env = envs.make(old_env_id)
new_env = envs.make(new_env_id)
old_reset_obs, old_info = old_env.reset(seed=seed)
new_reset_obs, new_info = new_env.reset(seed=seed)
np.testing.assert_allclose(old_reset_obs, new_reset_obs)
for i in range(num_actions):
action = old_env.action_space.sample()
old_obs, old_reward, old_terminated, old_truncated, old_info = old_env.step(
action
)
new_obs, new_reward, new_terminated, new_truncated, new_info = new_env.step(
action
)
np.testing.assert_allclose(old_obs, new_obs, atol=EPS)
np.testing.assert_allclose(old_reward, new_reward, atol=EPS)
np.testing.assert_equal(old_terminated, new_terminated)
np.testing.assert_equal(old_truncated, new_truncated)
for key in old_info:
np.testing.assert_allclose(old_info[key], new_info[key], atol=EPS)
if old_terminated or old_truncated:
break
EXCLUDE_POS_FROM_OBS = [
"Ant",
"HalfCheetah",
@@ -101,23 +63,3 @@ def test_obs_space_mujoco_environments(env_spec: EnvSpec):
assert env.observation_space.contains(
step_obs
), f"Observation returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space} when using contact forces."
MUJOCO_V2_V3_ENVS = [
spec.name
for spec in mujoco_testing_env_specs
if spec.version == 2 and f"{spec.name}-v3" in gym.envs.registry
]
@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
def test_mujoco_v2_to_v3_conversion(env_name: str):
"""Checks that all v2 mujoco environments are the same as v3 environments."""
verify_environments_match(f"{env_name}-v2", f"{env_name}-v3")
@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
def test_mujoco_incompatible_v3_to_v2(env_name: str):
"""Checks that the v3 environment are slightly different from v2, (v3 has additional info keys that v2 does not)."""
with pytest.raises(KeyError):
verify_environments_match(f"{env_name}-v3", f"{env_name}-v2")

View File

@@ -7,7 +7,6 @@ import pytest
import gymnasium as gym
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
from gymnasium.envs.mujoco.mujoco_py_env import BaseMujocoPyEnv
from gymnasium.envs.mujoco.utils import check_mujoco_reset_state
from gymnasium.error import Error
from gymnasium.utils.env_checker import check_env
@@ -37,18 +36,14 @@ ALL_MUJOCO_ENVS = [
"Ant-v5",
"HalfCheetah-v5",
"HalfCheetah-v4",
"HalfCheetah-v3",
"Hopper-v5",
"Hopper-v4",
"Hopper-v3",
"Humanoid-v5",
"HumanoidStandup-v5",
"Swimmer-v5",
"Swimmer-v4",
"Swimmer-v3",
"Walker2d-v5",
"Walker2d-v4",
"Walker2d-v3",
],
)
def test_verify_info_x_position(env_id: str):
@@ -71,7 +66,6 @@ def test_verify_info_x_position(env_id: str):
"HumanoidStandup-v5",
"Swimmer-v5",
"Swimmer-v4",
"Swimmer-v3",
],
)
def test_verify_info_y_position(env_id: str):
@@ -86,11 +80,11 @@ def test_verify_info_y_position(env_id: str):
# Note: "HumnanoidStandup-v4" does not have `info`
@pytest.mark.parametrize("env_name", ["HalfCheetah", "Hopper", "Swimmer", "Walker2d"])
@pytest.mark.parametrize("version", ["v5", "v4", "v3"])
@pytest.mark.parametrize("version", ["v5", "v4"])
def test_verify_info_x_velocity(env_name: str, version: str):
"""Asserts that the environment `info['x_velocity']` is properly assigned."""
env = gym.make(f"{env_name}-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset()
old_x = env.data.qpos[0]
@@ -103,11 +97,11 @@ def test_verify_info_x_velocity(env_name: str, version: str):
# Note: "HumnanoidStandup-v4" does not have `info`
@pytest.mark.parametrize("env_id", ["Swimmer-v5", "Swimmer-v4", "Swimmer-v3"])
@pytest.mark.parametrize("env_id", ["Swimmer-v5", "Swimmer-v4"])
def test_verify_info_y_velocity(env_id: str):
"""Asserts that the environment `info['y_velocity']` is properly assigned."""
env = gym.make(env_id).unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset()
old_y = env.data.qpos[1]
@@ -119,11 +113,11 @@ def test_verify_info_y_velocity(env_id: str):
assert vel_y == info["y_velocity"]
@pytest.mark.parametrize("env_id", ["Ant-v5", "Ant-v4", "Ant-v3"])
@pytest.mark.parametrize("env_id", ["Ant-v5", "Ant-v4"])
def test_verify_info_xy_velocity_xpos(env_id: str):
"""Asserts that the environment `info['x/y_velocity']` is properly assigned, for the ant environment which uses kinmatics for the velocity."""
env = gym.make(env_id).unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset()
old_xy = env.get_body_com("torso")[:2].copy()
@@ -136,7 +130,7 @@ def test_verify_info_xy_velocity_xpos(env_id: str):
assert vel_y == info["y_velocity"]
@pytest.mark.parametrize("env_id", ["Humanoid-v5", "Humanoid-v4", "Humanoid-v3"])
@pytest.mark.parametrize("env_id", ["Humanoid-v5", "Humanoid-v4"])
def test_verify_info_xy_velocity_com(env_id: str):
"""Asserts that the environment `info['x/y_velocity']` is properly assigned, for the humanoid environment which uses kinmatics of Center Of Mass for the velocity."""
@@ -146,7 +140,7 @@ def test_verify_info_xy_velocity_com(env_id: str):
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
env = gym.make(env_id).unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset()
old_xy = mass_center(env.model, env.data)
@@ -178,7 +172,7 @@ def test_verify_info_xy_velocity_com(env_id: str):
def test_verify_reward_survive(env_name: str, version: str):
"""Assert that `reward_survive` is 0 on `terminal` states and not 0 on non-`terminal` states."""
env = gym.make(f"{env_name}-{version}", reset_noise_scale=0).unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset(seed=0)
env.action_space.seed(1)
@@ -361,7 +355,7 @@ def test_ant_com(version: str):
"""Verify the kinmatic behaviour of the ant."""
# `env` contains `data : MjData` and `model : MjModel`
env = gym.make(f"Ant-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset() # randomly initlizies the `data.qpos` and `data.qvel`, calls mujoco.mj_forward(env.model, env.data)
x_position_before = env.data.qpos[0]
@@ -378,11 +372,11 @@ def test_ant_com(version: str):
assert x_position_after == x_position_after_com, "after failed" # This succeeds
@pytest.mark.parametrize("version", ["v5", "v4", "v3", "v2"])
@pytest.mark.parametrize("version", ["v5", "v4"])
def test_set_state(version: str):
"""Simple Test to verify that `mujocoEnv.set_state()` works correctly."""
env = gym.make(f"Hopper-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset()
new_qpos = np.array(
[0.00136962, 1.24769787, -0.00459026, -0.00483472, 0.0031327, 0.00412756]
@@ -400,12 +394,12 @@ def test_set_state(version: str):
# Note: Humanoid-v4/v3 fails this test
# Note: v2 does not have `info`
@pytest.mark.parametrize(
"env_id", ["Ant-v5", "Humanoid-v5", "Swimmer-v5", "Swimmer-v4", "Swimmer-v3"]
"env_id", ["Ant-v5", "Humanoid-v5", "Swimmer-v5", "Swimmer-v4"]
)
def test_distance_from_origin_info(env_id: str):
"""Verify that `info"distance_from_origin"` is correct."""
env = gym.make(env_id).unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset()
_, _, _, _, info = env.step(env.action_space.sample())
@@ -487,7 +481,7 @@ def test_reset_info(env_name: str, version: str):
def test_inverted_double_pendulum_max_height(version: str):
"""Verify the max height of Inverted Double Pendulum."""
env = gym.make(f"InvertedDoublePendulum-{version}", reset_noise_scale=0).unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.reset()
y = env.data.site_xpos[0][2]
@@ -498,7 +492,7 @@ def test_inverted_double_pendulum_max_height(version: str):
def test_inverted_double_pendulum_max_height_old(version: str):
"""Verify the max height of Inverted Double Pendulum (v4 does not have `reset_noise_scale` argument)."""
env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
env.set_state(env.init_qpos, env.init_qvel)
y = env.data.site_xpos[0][2]
@@ -510,7 +504,7 @@ def test_inverted_double_pendulum_max_height_old(version: str):
def test_model_object_count(version: str):
"""Verify that all the objects of the model are loaded, mostly useful for using non-mujoco simulator."""
env = gym.make(f"Ant-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 15
assert env.model.nv == 14
assert env.model.nu == 8
@@ -521,7 +515,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 0
env = gym.make(f"HalfCheetah-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 9
assert env.model.nv == 9
assert env.model.nu == 6
@@ -532,7 +526,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 0
env = gym.make(f"Hopper-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 6
assert env.model.nv == 6
assert env.model.nu == 3
@@ -543,7 +537,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 0
env = gym.make(f"Humanoid-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 24
assert env.model.nv == 23
assert env.model.nu == 17
@@ -554,7 +548,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 2
env = gym.make(f"HumanoidStandup-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 24
assert env.model.nv == 23
assert env.model.nu == 17
@@ -565,7 +559,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 2
env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 3
assert env.model.nv == 3
assert env.model.nu == 1
@@ -576,7 +570,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 0
env = gym.make(f"InvertedPendulum-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 2
assert env.model.nv == 2
assert env.model.nu == 1
@@ -588,7 +582,7 @@ def test_model_object_count(version: str):
if not (version == "v4" and mujoco.__version__ >= "3.0.0"):
env = gym.make(f"Pusher-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 11
assert env.model.nv == 11
assert env.model.nu == 7
@@ -607,7 +601,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 0
env = gym.make(f"Reacher-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 4
assert env.model.nv == 4
assert env.model.nu == 2
@@ -619,7 +613,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 0
env = gym.make(f"Swimmer-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, (MujocoEnv))
assert env.model.nq == 5
assert env.model.nv == 5
assert env.model.nu == 2
@@ -631,7 +625,7 @@ def test_model_object_count(version: str):
assert env.model.ntendon == 0
env = gym.make(f"Walker2d-{version}").unwrapped
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
assert isinstance(env, MujocoEnv)
assert env.model.nq == 9
assert env.model.nv == 9
assert env.model.nu == 6
@@ -643,7 +637,7 @@ def test_model_object_count(version: str):
# note: fails with `mujoco-mjx==3.0.1`
@pytest.mark.parametrize("version", ["v5", "v4", "v3", "v2"])
@pytest.mark.parametrize("version", ["v5", "v4"])
def test_model_sensors(version: str):
"""Verify that all the sensors of the model are loaded."""
env = gym.make(f"Ant-{version}").unwrapped
@@ -682,25 +676,19 @@ def test_dt():
[
"Ant-v5",
"Ant-v4",
"Ant-v3",
"HalfCheetah-v5",
"HalfCheetah-v4",
"HalfCheetah-v3",
"Hopper-v5",
"Hopper-v4",
"Hopper-v3",
"Humanoid-v5",
"Humanoid-v4",
"Humanoid-v3",
"HumanoidStandup-v5",
"InvertedDoublePendulum-v5",
"InvertedPendulum-v5",
"Swimmer-v5",
"Swimmer-v4",
"Swimmer-v3",
"Walker2d-v5",
"Walker2d-v4",
"Walker2d-v3",
],
)
def test_reset_noise_scale(env_id):

View File

@@ -13,7 +13,6 @@ from tests.envs.utils import all_testing_env_specs, all_testing_initialised_envs
# This runs a smoketest on each official registered env. We may want
# to try also running environments which are not officially registered envs.
PASSIVE_CHECK_IGNORE_WARNING = [
r"\x1b\[33mWARN: This version of the mujoco environments depends on the mujoco-py bindings, which are no longer maintained and may stop working\. Please upgrade to the v4 versions of the environments \(which depend on the mujoco python bindings instead\), unless you are trying to precisely replicate previous works\)\.\x1b\[0m",
r"\x1b\[33mWARN: The environment (.*?) is out of date\. You should consider upgrading to version `v(\d)`\.\x1b\[0m",
]
@@ -21,7 +20,6 @@ PASSIVE_CHECK_IGNORE_WARNING = [
CHECK_ENV_IGNORE_WARNINGS = [
f"\x1b[33mWARN: {message}\x1b[0m"
for message in [
"This version of the mujoco environments depends on the mujoco-py bindings, which are no longer maintained and may stop working. Please upgrade to the v4 versions of the environments (which depend on the mujoco python bindings instead), unless you are trying to precisely replicate previous works).",
"A Box observation space minimum value is -infinity. This is probably too low.",
"A Box observation space maximum value is infinity. This is probably too high.",
"For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.",

View File

@@ -43,21 +43,13 @@ def check_rendered(rendered_frame, mode: str):
# We do not check render_mode for some mujoco envs and any old Gym environment wrapped by `GymEnvironment`
render_mode_env_specs = [
spec
for spec in all_testing_env_specs
if "mujoco" not in spec.entry_point or spec.version >= 4
]
render_mode_env_specs = [spec for spec in all_testing_env_specs]
@pytest.mark.parametrize(
"spec", render_mode_env_specs, ids=[spec.id for spec in render_mode_env_specs]
)
def test_render_modes(spec):
"""There is a known issue where rendering a mujoco environment then mujoco-py will cause an error on non-mac based systems.
Therefore, we are only testing with mujoco environments.
"""
env = spec.make()
assert "rgb_array" in env.metadata["render_modes"]