mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-07-31 05:44:31 +00:00
migrating the MuJoCo environments v2 and v3 to Gymnasium-Robotics
(mujoco-py
based) (#1392)
This commit is contained in:
committed by
GitHub
parent
de2c10ef1a
commit
f20e3f4845
@@ -11,22 +11,10 @@ RUN apt-get -y update \
|
||||
xvfb unzip patchelf ffmpeg cmake swig \
|
||||
&& apt-get autoremove -y \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
# Download mujoco
|
||||
&& mkdir /root/.mujoco \
|
||||
&& cd /root/.mujoco \
|
||||
&& wget -qO- 'https://github.com/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz' | tar -xzvf -
|
||||
|
||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin"
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install uv
|
||||
|
||||
# Build mujoco-py from source. Pypi installs wheel packages and Cython won't recompile old file versions in the Github Actions CI.
|
||||
# Thus generating the following error https://github.com/cython/cython/pull/4428
|
||||
RUN git clone https://github.com/openai/mujoco-py.git\
|
||||
&& cd mujoco-py \
|
||||
&& uv pip install --system -e .
|
||||
|
||||
COPY . /usr/local/gymnasium/
|
||||
WORKDIR /usr/local/gymnasium/
|
||||
|
||||
|
@@ -38,7 +38,12 @@ As of October 2021, DeepMind has acquired MuJoCo and has open-sourced it in 2022
|
||||
Using MuJoCo with Gymnasium requires the framework `mujoco` be installed (this dependency is installed with the above command).
|
||||
Instructions for installing the MuJoCo engine can be found on their [website](https://mujoco.org) and [GitHub repository](https://github.com/deepmind/mujoco).
|
||||
|
||||
For MuJoCo `v3` environments and older the `mujoco-py` framework is required (`pip install gymnasium[mujoco-py]`) which can be found in the [GitHub repository](https://github.com/openai/mujoco-py/tree/master/mujoco_py).
|
||||
MuJoCo `v3` environments and older, which relied on the `mujoco-py` framework, were migrated to the `gymnasium-robotics` package starting with `gymnasium` v1.2. For information on using these older versions, please refer to the `gymnasium-robotics` documentation. The original `mujoco-py` framework can be found in its [GitHub repository](https://github.com/openai/mujoco-py/tree/master/mujoco_py).
|
||||
|
||||
The dependencies for the old environment versions can be installed via:
|
||||
````bash
|
||||
pip install gymnasium_robotics[mujoco-py]
|
||||
````
|
||||
|
||||
There are eleven MuJoCo environments (in roughly increasing complexity):
|
||||
|
||||
@@ -76,12 +81,12 @@ Environments can be configured by changing the `xml_file` argument and/or by twe
|
||||
## Versions
|
||||
Gymnasium includes the following versions of the environments:
|
||||
|
||||
| Version | Simulator | Notes |
|
||||
|---------|-----------------|--------------------------------------------------------|
|
||||
| `v5` | `mujoco=>2.3.3` | Recommended (most features, the least bugs) |
|
||||
| `v4` | `mujoco=>2.1.3` | Maintained for reproducibility |
|
||||
| `v3` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) |
|
||||
| `v2` | `mujoco-py` | Deprecated, Kept for reproducibility (limited support) |
|
||||
| Version | Simulator | Notes |
|
||||
|---------|-----------------|------------------------------------------------------------------------------------------------------|
|
||||
| `v5` | `mujoco=>2.3.3` | Recommended (most features, the least bugs) |
|
||||
| `v4` | `mujoco=>2.1.3` | Maintained for reproducibility |
|
||||
| `v3` | `mujoco-py` | Migrated to `gymnasium-robotics` (from `gymnasium` v1.2). Deprecated, Kept for reproducibility. |
|
||||
| `v2` | `mujoco-py` | Migrated to `gymnasium-robotics` (from `gymnasium` v1.2). Deprecated, Kept for reproducibility. |
|
||||
|
||||
For more information, see the section "Version History" for each environment.
|
||||
|
||||
|
@@ -186,13 +186,6 @@ register(
|
||||
|
||||
# manipulation
|
||||
|
||||
register(
|
||||
id="Reacher-v2",
|
||||
entry_point="gymnasium.envs.mujoco.reacher:ReacherEnv",
|
||||
max_episode_steps=50,
|
||||
reward_threshold=-3.75,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Reacher-v4",
|
||||
entry_point="gymnasium.envs.mujoco.reacher_v4:ReacherEnv",
|
||||
@@ -207,13 +200,6 @@ register(
|
||||
reward_threshold=-3.75,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Pusher-v2",
|
||||
entry_point="gymnasium.envs.mujoco.pusher:PusherEnv",
|
||||
max_episode_steps=100,
|
||||
reward_threshold=0.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Pusher-v4",
|
||||
entry_point="gymnasium.envs.mujoco.pusher_v4:PusherEnv",
|
||||
@@ -230,13 +216,6 @@ register(
|
||||
|
||||
# balance
|
||||
|
||||
register(
|
||||
id="InvertedPendulum-v2",
|
||||
entry_point="gymnasium.envs.mujoco.inverted_pendulum:InvertedPendulumEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=950.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="InvertedPendulum-v4",
|
||||
entry_point="gymnasium.envs.mujoco.inverted_pendulum_v4:InvertedPendulumEnv",
|
||||
@@ -251,13 +230,6 @@ register(
|
||||
reward_threshold=950.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="InvertedDoublePendulum-v2",
|
||||
entry_point="gymnasium.envs.mujoco.inverted_double_pendulum:InvertedDoublePendulumEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=9100.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="InvertedDoublePendulum-v4",
|
||||
entry_point="gymnasium.envs.mujoco.inverted_double_pendulum_v4:InvertedDoublePendulumEnv",
|
||||
@@ -274,20 +246,6 @@ register(
|
||||
|
||||
# runners
|
||||
|
||||
register(
|
||||
id="HalfCheetah-v2",
|
||||
entry_point="gymnasium.envs.mujoco.half_cheetah:HalfCheetahEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=4800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="HalfCheetah-v3",
|
||||
entry_point="gymnasium.envs.mujoco.half_cheetah_v3:HalfCheetahEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=4800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="HalfCheetah-v4",
|
||||
entry_point="gymnasium.envs.mujoco.half_cheetah_v4:HalfCheetahEnv",
|
||||
@@ -302,20 +260,6 @@ register(
|
||||
reward_threshold=4800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Hopper-v2",
|
||||
entry_point="gymnasium.envs.mujoco.hopper:HopperEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=3800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Hopper-v3",
|
||||
entry_point="gymnasium.envs.mujoco.hopper_v3:HopperEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=3800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Hopper-v4",
|
||||
entry_point="gymnasium.envs.mujoco.hopper_v4:HopperEnv",
|
||||
@@ -330,20 +274,6 @@ register(
|
||||
reward_threshold=3800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Swimmer-v2",
|
||||
entry_point="gymnasium.envs.mujoco.swimmer:SwimmerEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=360.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Swimmer-v3",
|
||||
entry_point="gymnasium.envs.mujoco.swimmer_v3:SwimmerEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=360.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Swimmer-v4",
|
||||
entry_point="gymnasium.envs.mujoco.swimmer_v4:SwimmerEnv",
|
||||
@@ -358,18 +288,6 @@ register(
|
||||
reward_threshold=360.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Walker2d-v2",
|
||||
max_episode_steps=1000,
|
||||
entry_point="gymnasium.envs.mujoco.walker2d:Walker2dEnv",
|
||||
)
|
||||
|
||||
register(
|
||||
id="Walker2d-v3",
|
||||
max_episode_steps=1000,
|
||||
entry_point="gymnasium.envs.mujoco.walker2d_v3:Walker2dEnv",
|
||||
)
|
||||
|
||||
register(
|
||||
id="Walker2d-v4",
|
||||
max_episode_steps=1000,
|
||||
@@ -382,20 +300,6 @@ register(
|
||||
entry_point="gymnasium.envs.mujoco.walker2d_v5:Walker2dEnv",
|
||||
)
|
||||
|
||||
register(
|
||||
id="Ant-v2",
|
||||
entry_point="gymnasium.envs.mujoco.ant:AntEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=6000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Ant-v3",
|
||||
entry_point="gymnasium.envs.mujoco.ant_v3:AntEnv",
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=6000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Ant-v4",
|
||||
entry_point="gymnasium.envs.mujoco.ant_v4:AntEnv",
|
||||
@@ -410,18 +314,6 @@ register(
|
||||
reward_threshold=6000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Humanoid-v2",
|
||||
entry_point="gymnasium.envs.mujoco.humanoid:HumanoidEnv",
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Humanoid-v3",
|
||||
entry_point="gymnasium.envs.mujoco.humanoid_v3:HumanoidEnv",
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
register(
|
||||
id="Humanoid-v4",
|
||||
entry_point="gymnasium.envs.mujoco.humanoid_v4:HumanoidEnv",
|
||||
@@ -434,12 +326,6 @@ register(
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
register(
|
||||
id="HumanoidStandup-v2",
|
||||
entry_point="gymnasium.envs.mujoco.humanoidstandup:HumanoidStandupEnv",
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
register(
|
||||
id="HumanoidStandup-v4",
|
||||
entry_point="gymnasium.envs.mujoco.humanoidstandup_v4:HumanoidStandupEnv",
|
||||
|
@@ -1,81 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 20,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
|
||||
)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "ant.xml", 5, observation_space=observation_space, **kwargs
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def step(self, a):
|
||||
xposbefore = self.get_body_com("torso")[0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
xposafter = self.get_body_com("torso")[0]
|
||||
|
||||
forward_reward = (xposafter - xposbefore) / self.dt
|
||||
ctrl_cost = 0.5 * np.square(a).sum()
|
||||
contact_cost = (
|
||||
0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
|
||||
)
|
||||
survive_reward = 1.0
|
||||
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
|
||||
state = self.state_vector()
|
||||
not_terminated = (
|
||||
np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
|
||||
)
|
||||
terminated = not not_terminated
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
terminated,
|
||||
False,
|
||||
dict(
|
||||
reward_forward=forward_reward,
|
||||
reward_ctrl=-ctrl_cost,
|
||||
reward_contact=-contact_cost,
|
||||
reward_survive=survive_reward,
|
||||
),
|
||||
)
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate(
|
||||
[
|
||||
self.sim.data.qpos.flat[2:],
|
||||
self.sim.data.qvel.flat,
|
||||
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
|
||||
]
|
||||
)
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
size=self.model.nq, low=-0.1, high=0.1
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.distance = self.model.stat.extent * 0.5
|
@@ -1,188 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
DEFAULT_CAMERA_CONFIG = {
|
||||
"distance": 4.0,
|
||||
}
|
||||
|
||||
|
||||
class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 20,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
xml_file="ant.xml",
|
||||
ctrl_cost_weight=0.5,
|
||||
contact_cost_weight=5e-4,
|
||||
healthy_reward=1.0,
|
||||
terminate_when_unhealthy=True,
|
||||
healthy_z_range=(0.2, 1.0),
|
||||
contact_force_range=(-1.0, 1.0),
|
||||
reset_noise_scale=0.1,
|
||||
exclude_current_positions_from_observation=True,
|
||||
**kwargs,
|
||||
):
|
||||
utils.EzPickle.__init__(
|
||||
self,
|
||||
xml_file,
|
||||
ctrl_cost_weight,
|
||||
contact_cost_weight,
|
||||
healthy_reward,
|
||||
terminate_when_unhealthy,
|
||||
healthy_z_range,
|
||||
contact_force_range,
|
||||
reset_noise_scale,
|
||||
exclude_current_positions_from_observation,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._ctrl_cost_weight = ctrl_cost_weight
|
||||
self._contact_cost_weight = contact_cost_weight
|
||||
|
||||
self._healthy_reward = healthy_reward
|
||||
self._terminate_when_unhealthy = terminate_when_unhealthy
|
||||
self._healthy_z_range = healthy_z_range
|
||||
|
||||
self._contact_force_range = contact_force_range
|
||||
|
||||
self._reset_noise_scale = reset_noise_scale
|
||||
|
||||
self._exclude_current_positions_from_observation = (
|
||||
exclude_current_positions_from_observation
|
||||
)
|
||||
|
||||
if exclude_current_positions_from_observation:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
|
||||
)
|
||||
else:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64
|
||||
)
|
||||
|
||||
MuJocoPyEnv.__init__(
|
||||
self, xml_file, 5, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
@property
|
||||
def healthy_reward(self):
|
||||
return (
|
||||
float(self.is_healthy or self._terminate_when_unhealthy)
|
||||
* self._healthy_reward
|
||||
)
|
||||
|
||||
def control_cost(self, action):
|
||||
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
||||
return control_cost
|
||||
|
||||
@property
|
||||
def contact_forces(self):
|
||||
raw_contact_forces = self.sim.data.cfrc_ext
|
||||
min_value, max_value = self._contact_force_range
|
||||
contact_forces = np.clip(raw_contact_forces, min_value, max_value)
|
||||
return contact_forces
|
||||
|
||||
@property
|
||||
def contact_cost(self):
|
||||
contact_cost = self._contact_cost_weight * np.sum(
|
||||
np.square(self.contact_forces)
|
||||
)
|
||||
return contact_cost
|
||||
|
||||
@property
|
||||
def is_healthy(self):
|
||||
state = self.state_vector()
|
||||
min_z, max_z = self._healthy_z_range
|
||||
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
|
||||
return is_healthy
|
||||
|
||||
@property
|
||||
def terminated(self):
|
||||
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
|
||||
return terminated
|
||||
|
||||
def step(self, action):
|
||||
xy_position_before = self.get_body_com("torso")[:2].copy()
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
xy_position_after = self.get_body_com("torso")[:2].copy()
|
||||
|
||||
xy_velocity = (xy_position_after - xy_position_before) / self.dt
|
||||
x_velocity, y_velocity = xy_velocity
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
contact_cost = self.contact_cost
|
||||
|
||||
forward_reward = x_velocity
|
||||
healthy_reward = self.healthy_reward
|
||||
|
||||
rewards = forward_reward + healthy_reward
|
||||
costs = ctrl_cost + contact_cost
|
||||
|
||||
reward = rewards - costs
|
||||
terminated = self.terminated
|
||||
observation = self._get_obs()
|
||||
info = {
|
||||
"reward_forward": forward_reward,
|
||||
"reward_ctrl": -ctrl_cost,
|
||||
"reward_contact": -contact_cost,
|
||||
"reward_survive": healthy_reward,
|
||||
"x_position": xy_position_after[0],
|
||||
"y_position": xy_position_after[1],
|
||||
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
|
||||
"x_velocity": x_velocity,
|
||||
"y_velocity": y_velocity,
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
position = self.sim.data.qpos.flat.copy()
|
||||
velocity = self.sim.data.qvel.flat.copy()
|
||||
contact_force = self.contact_forces.flat.copy()
|
||||
|
||||
if self._exclude_current_positions_from_observation:
|
||||
position = position[2:]
|
||||
|
||||
observations = np.concatenate((position, velocity, contact_force))
|
||||
|
||||
return observations
|
||||
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nq
|
||||
)
|
||||
qvel = (
|
||||
self.init_qvel
|
||||
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
|
||||
)
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
observation = self._get_obs()
|
||||
|
||||
return observation
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
||||
if isinstance(value, np.ndarray):
|
||||
getattr(self.viewer.cam, key)[:] = value
|
||||
else:
|
||||
setattr(self.viewer.cam, key, value)
|
@@ -1,65 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 20,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def step(self, action):
|
||||
xposbefore = self.sim.data.qpos[0]
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
xposafter = self.sim.data.qpos[0]
|
||||
|
||||
ob = self._get_obs()
|
||||
reward_ctrl = -0.1 * np.square(action).sum()
|
||||
reward_run = (xposafter - xposbefore) / self.dt
|
||||
reward = reward_ctrl + reward_run
|
||||
terminated = False
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
terminated,
|
||||
False,
|
||||
dict(reward_run=reward_run, reward_ctrl=reward_ctrl),
|
||||
)
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate(
|
||||
[
|
||||
self.sim.data.qpos.flat[1:],
|
||||
self.sim.data.qvel.flat,
|
||||
]
|
||||
)
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=-0.1, high=0.1, size=self.model.nq
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.distance = self.model.stat.extent * 0.5
|
@@ -1,129 +0,0 @@
|
||||
__credits__ = ["Rushiv Arora"]
|
||||
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
DEFAULT_CAMERA_CONFIG = {
|
||||
"distance": 4.0,
|
||||
}
|
||||
|
||||
|
||||
class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 20,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
xml_file="half_cheetah.xml",
|
||||
forward_reward_weight=1.0,
|
||||
ctrl_cost_weight=0.1,
|
||||
reset_noise_scale=0.1,
|
||||
exclude_current_positions_from_observation=True,
|
||||
**kwargs,
|
||||
):
|
||||
utils.EzPickle.__init__(
|
||||
self,
|
||||
xml_file,
|
||||
forward_reward_weight,
|
||||
ctrl_cost_weight,
|
||||
reset_noise_scale,
|
||||
exclude_current_positions_from_observation,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._forward_reward_weight = forward_reward_weight
|
||||
|
||||
self._ctrl_cost_weight = ctrl_cost_weight
|
||||
|
||||
self._reset_noise_scale = reset_noise_scale
|
||||
|
||||
self._exclude_current_positions_from_observation = (
|
||||
exclude_current_positions_from_observation
|
||||
)
|
||||
|
||||
if exclude_current_positions_from_observation:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
|
||||
)
|
||||
else:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
|
||||
)
|
||||
|
||||
MuJocoPyEnv.__init__(
|
||||
self, xml_file, 5, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
def control_cost(self, action):
|
||||
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
||||
return control_cost
|
||||
|
||||
def step(self, action):
|
||||
x_position_before = self.sim.data.qpos[0]
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
x_position_after = self.sim.data.qpos[0]
|
||||
x_velocity = (x_position_after - x_position_before) / self.dt
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
|
||||
forward_reward = self._forward_reward_weight * x_velocity
|
||||
|
||||
observation = self._get_obs()
|
||||
reward = forward_reward - ctrl_cost
|
||||
terminated = False
|
||||
info = {
|
||||
"x_position": x_position_after,
|
||||
"x_velocity": x_velocity,
|
||||
"reward_run": forward_reward,
|
||||
"reward_ctrl": -ctrl_cost,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
position = self.sim.data.qpos.flat.copy()
|
||||
velocity = self.sim.data.qvel.flat.copy()
|
||||
|
||||
if self._exclude_current_positions_from_observation:
|
||||
position = position[1:]
|
||||
|
||||
observation = np.concatenate((position, velocity)).ravel()
|
||||
return observation
|
||||
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nq
|
||||
)
|
||||
qvel = (
|
||||
self.init_qvel
|
||||
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
|
||||
)
|
||||
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
||||
if isinstance(value, np.ndarray):
|
||||
getattr(self.viewer.cam, key)[:] = value
|
||||
else:
|
||||
setattr(self.viewer.cam, key, value)
|
@@ -1,68 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 125,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "hopper.xml", 4, observation_space=observation_space, **kwargs
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def step(self, a):
|
||||
posbefore = self.sim.data.qpos[0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
posafter, height, ang = self.sim.data.qpos[0:3]
|
||||
|
||||
alive_bonus = 1.0
|
||||
reward = (posafter - posbefore) / self.dt
|
||||
reward += alive_bonus
|
||||
reward -= 1e-3 * np.square(a).sum()
|
||||
s = self.state_vector()
|
||||
terminated = not (
|
||||
np.isfinite(s).all()
|
||||
and (np.abs(s[2:]) < 100).all()
|
||||
and (height > 0.7)
|
||||
and (abs(ang) < 0.2)
|
||||
)
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate(
|
||||
[self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
|
||||
)
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=-0.005, high=0.005, size=self.model.nq
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=-0.005, high=0.005, size=self.model.nv
|
||||
)
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.trackbodyid = 2
|
||||
self.viewer.cam.distance = self.model.stat.extent * 0.75
|
||||
self.viewer.cam.lookat[2] = 1.15
|
||||
self.viewer.cam.elevation = -20
|
@@ -1,179 +0,0 @@
|
||||
__credits__ = ["Rushiv Arora"]
|
||||
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
DEFAULT_CAMERA_CONFIG = {
|
||||
"trackbodyid": 2,
|
||||
"distance": 3.0,
|
||||
"lookat": np.array((0.0, 0.0, 1.15)),
|
||||
"elevation": -20.0,
|
||||
}
|
||||
|
||||
|
||||
class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 125,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
xml_file="hopper.xml",
|
||||
forward_reward_weight=1.0,
|
||||
ctrl_cost_weight=1e-3,
|
||||
healthy_reward=1.0,
|
||||
terminate_when_unhealthy=True,
|
||||
healthy_state_range=(-100.0, 100.0),
|
||||
healthy_z_range=(0.7, float("inf")),
|
||||
healthy_angle_range=(-0.2, 0.2),
|
||||
reset_noise_scale=5e-3,
|
||||
exclude_current_positions_from_observation=True,
|
||||
**kwargs,
|
||||
):
|
||||
utils.EzPickle.__init__(
|
||||
self,
|
||||
xml_file,
|
||||
forward_reward_weight,
|
||||
ctrl_cost_weight,
|
||||
healthy_reward,
|
||||
terminate_when_unhealthy,
|
||||
healthy_state_range,
|
||||
healthy_z_range,
|
||||
healthy_angle_range,
|
||||
reset_noise_scale,
|
||||
exclude_current_positions_from_observation,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._forward_reward_weight = forward_reward_weight
|
||||
|
||||
self._ctrl_cost_weight = ctrl_cost_weight
|
||||
|
||||
self._healthy_reward = healthy_reward
|
||||
self._terminate_when_unhealthy = terminate_when_unhealthy
|
||||
|
||||
self._healthy_state_range = healthy_state_range
|
||||
self._healthy_z_range = healthy_z_range
|
||||
self._healthy_angle_range = healthy_angle_range
|
||||
|
||||
self._reset_noise_scale = reset_noise_scale
|
||||
|
||||
self._exclude_current_positions_from_observation = (
|
||||
exclude_current_positions_from_observation
|
||||
)
|
||||
|
||||
if exclude_current_positions_from_observation:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64
|
||||
)
|
||||
else:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64
|
||||
)
|
||||
|
||||
MuJocoPyEnv.__init__(
|
||||
self, xml_file, 4, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
@property
|
||||
def healthy_reward(self):
|
||||
return (
|
||||
float(self.is_healthy or self._terminate_when_unhealthy)
|
||||
* self._healthy_reward
|
||||
)
|
||||
|
||||
def control_cost(self, action):
|
||||
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
||||
return control_cost
|
||||
|
||||
@property
|
||||
def is_healthy(self):
|
||||
z, angle = self.sim.data.qpos[1:3]
|
||||
state = self.state_vector()[2:]
|
||||
|
||||
min_state, max_state = self._healthy_state_range
|
||||
min_z, max_z = self._healthy_z_range
|
||||
min_angle, max_angle = self._healthy_angle_range
|
||||
|
||||
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
|
||||
healthy_z = min_z < z < max_z
|
||||
healthy_angle = min_angle < angle < max_angle
|
||||
|
||||
is_healthy = all((healthy_state, healthy_z, healthy_angle))
|
||||
|
||||
return is_healthy
|
||||
|
||||
@property
|
||||
def terminated(self):
|
||||
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
|
||||
return terminated
|
||||
|
||||
def _get_obs(self):
|
||||
position = self.sim.data.qpos.flat.copy()
|
||||
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
|
||||
|
||||
if self._exclude_current_positions_from_observation:
|
||||
position = position[1:]
|
||||
|
||||
observation = np.concatenate((position, velocity)).ravel()
|
||||
return observation
|
||||
|
||||
def step(self, action):
|
||||
x_position_before = self.sim.data.qpos[0]
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
x_position_after = self.sim.data.qpos[0]
|
||||
x_velocity = (x_position_after - x_position_before) / self.dt
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
|
||||
forward_reward = self._forward_reward_weight * x_velocity
|
||||
healthy_reward = self.healthy_reward
|
||||
|
||||
rewards = forward_reward + healthy_reward
|
||||
costs = ctrl_cost
|
||||
|
||||
observation = self._get_obs()
|
||||
reward = rewards - costs
|
||||
terminated = self.terminated
|
||||
info = {
|
||||
"x_position": x_position_after,
|
||||
"x_velocity": x_velocity,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nq
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nv
|
||||
)
|
||||
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
||||
if isinstance(value, np.ndarray):
|
||||
getattr(self.viewer.cam, key)[:] = value
|
||||
else:
|
||||
setattr(self.viewer.cam, key, value)
|
@@ -1,95 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
def mass_center(model, sim):
|
||||
mass = np.expand_dims(model.body_mass, 1)
|
||||
xpos = sim.data.xipos
|
||||
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
|
||||
|
||||
|
||||
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 67,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
|
||||
)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "humanoid.xml", 5, observation_space=observation_space, **kwargs
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def _get_obs(self):
|
||||
data = self.sim.data
|
||||
return np.concatenate(
|
||||
[
|
||||
data.qpos.flat[2:],
|
||||
data.qvel.flat,
|
||||
data.cinert.flat,
|
||||
data.cvel.flat,
|
||||
data.qfrc_actuator.flat,
|
||||
data.cfrc_ext.flat,
|
||||
]
|
||||
)
|
||||
|
||||
def step(self, a):
|
||||
pos_before = mass_center(self.model, self.sim)
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
pos_after = mass_center(self.model, self.sim)
|
||||
|
||||
alive_bonus = 5.0
|
||||
data = self.sim.data
|
||||
lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
|
||||
qpos = self.sim.data.qpos
|
||||
terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
terminated,
|
||||
False,
|
||||
dict(
|
||||
reward_linvel=lin_vel_cost,
|
||||
reward_quadctrl=-quad_ctrl_cost,
|
||||
reward_alive=alive_bonus,
|
||||
reward_impact=-quad_impact_cost,
|
||||
),
|
||||
)
|
||||
|
||||
def reset_model(self):
|
||||
c = 0.01
|
||||
self.set_state(
|
||||
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel
|
||||
+ self.np_random.uniform(
|
||||
low=-c,
|
||||
high=c,
|
||||
size=self.model.nv,
|
||||
),
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.trackbodyid = 1
|
||||
self.viewer.cam.distance = self.model.stat.extent * 1.0
|
||||
self.viewer.cam.lookat[2] = 2.0
|
||||
self.viewer.cam.elevation = -20
|
@@ -1,201 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
DEFAULT_CAMERA_CONFIG = {
|
||||
"trackbodyid": 1,
|
||||
"distance": 4.0,
|
||||
"lookat": np.array((0.0, 0.0, 2.0)),
|
||||
"elevation": -20.0,
|
||||
}
|
||||
|
||||
|
||||
def mass_center(model, sim):
|
||||
mass = np.expand_dims(model.body_mass, axis=1)
|
||||
xpos = sim.data.xipos
|
||||
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
|
||||
|
||||
|
||||
class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 67,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
xml_file="humanoid.xml",
|
||||
forward_reward_weight=1.25,
|
||||
ctrl_cost_weight=0.1,
|
||||
contact_cost_weight=5e-7,
|
||||
contact_cost_range=(-np.inf, 10.0),
|
||||
healthy_reward=5.0,
|
||||
terminate_when_unhealthy=True,
|
||||
healthy_z_range=(1.0, 2.0),
|
||||
reset_noise_scale=1e-2,
|
||||
exclude_current_positions_from_observation=True,
|
||||
**kwargs,
|
||||
):
|
||||
utils.EzPickle.__init__(
|
||||
self,
|
||||
xml_file,
|
||||
forward_reward_weight,
|
||||
ctrl_cost_weight,
|
||||
contact_cost_weight,
|
||||
contact_cost_range,
|
||||
healthy_reward,
|
||||
terminate_when_unhealthy,
|
||||
healthy_z_range,
|
||||
reset_noise_scale,
|
||||
exclude_current_positions_from_observation,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._forward_reward_weight = forward_reward_weight
|
||||
self._ctrl_cost_weight = ctrl_cost_weight
|
||||
self._contact_cost_weight = contact_cost_weight
|
||||
self._contact_cost_range = contact_cost_range
|
||||
self._healthy_reward = healthy_reward
|
||||
self._terminate_when_unhealthy = terminate_when_unhealthy
|
||||
self._healthy_z_range = healthy_z_range
|
||||
|
||||
self._reset_noise_scale = reset_noise_scale
|
||||
|
||||
self._exclude_current_positions_from_observation = (
|
||||
exclude_current_positions_from_observation
|
||||
)
|
||||
if exclude_current_positions_from_observation:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
|
||||
)
|
||||
else:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64
|
||||
)
|
||||
|
||||
MuJocoPyEnv.__init__(
|
||||
self, xml_file, 5, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
@property
|
||||
def healthy_reward(self):
|
||||
return (
|
||||
float(self.is_healthy or self._terminate_when_unhealthy)
|
||||
* self._healthy_reward
|
||||
)
|
||||
|
||||
def control_cost(self, action):
|
||||
control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl))
|
||||
return control_cost
|
||||
|
||||
@property
|
||||
def contact_cost(self):
|
||||
contact_forces = self.sim.data.cfrc_ext
|
||||
contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces))
|
||||
min_cost, max_cost = self._contact_cost_range
|
||||
contact_cost = np.clip(contact_cost, min_cost, max_cost)
|
||||
return contact_cost
|
||||
|
||||
@property
|
||||
def is_healthy(self):
|
||||
min_z, max_z = self._healthy_z_range
|
||||
is_healthy = min_z < self.sim.data.qpos[2] < max_z
|
||||
|
||||
return is_healthy
|
||||
|
||||
@property
|
||||
def terminated(self):
|
||||
terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False
|
||||
return terminated
|
||||
|
||||
def _get_obs(self):
|
||||
position = self.sim.data.qpos.flat.copy()
|
||||
velocity = self.sim.data.qvel.flat.copy()
|
||||
|
||||
com_inertia = self.sim.data.cinert.flat.copy()
|
||||
com_velocity = self.sim.data.cvel.flat.copy()
|
||||
|
||||
actuator_forces = self.sim.data.qfrc_actuator.flat.copy()
|
||||
external_contact_forces = self.sim.data.cfrc_ext.flat.copy()
|
||||
|
||||
if self._exclude_current_positions_from_observation:
|
||||
position = position[2:]
|
||||
|
||||
return np.concatenate(
|
||||
(
|
||||
position,
|
||||
velocity,
|
||||
com_inertia,
|
||||
com_velocity,
|
||||
actuator_forces,
|
||||
external_contact_forces,
|
||||
)
|
||||
)
|
||||
|
||||
def step(self, action):
|
||||
xy_position_before = mass_center(self.model, self.sim)
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
xy_position_after = mass_center(self.model, self.sim)
|
||||
|
||||
xy_velocity = (xy_position_after - xy_position_before) / self.dt
|
||||
x_velocity, y_velocity = xy_velocity
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
contact_cost = self.contact_cost
|
||||
|
||||
forward_reward = self._forward_reward_weight * x_velocity
|
||||
healthy_reward = self.healthy_reward
|
||||
|
||||
rewards = forward_reward + healthy_reward
|
||||
costs = ctrl_cost + contact_cost
|
||||
|
||||
observation = self._get_obs()
|
||||
reward = rewards - costs
|
||||
terminated = self.terminated
|
||||
info = {
|
||||
"reward_linvel": forward_reward,
|
||||
"reward_quadctrl": -ctrl_cost,
|
||||
"reward_alive": healthy_reward,
|
||||
"reward_impact": -contact_cost,
|
||||
"x_position": xy_position_after[0],
|
||||
"y_position": xy_position_after[1],
|
||||
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
|
||||
"x_velocity": x_velocity,
|
||||
"y_velocity": y_velocity,
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nq
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nv
|
||||
)
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
||||
if isinstance(value, np.ndarray):
|
||||
getattr(self.viewer.cam, key)[:] = value
|
||||
else:
|
||||
setattr(self.viewer.cam, key, value)
|
@@ -1,88 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 67,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64
|
||||
)
|
||||
MuJocoPyEnv.__init__(
|
||||
self,
|
||||
"humanoidstandup.xml",
|
||||
5,
|
||||
observation_space=observation_space,
|
||||
**kwargs,
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def _get_obs(self):
|
||||
data = self.sim.data
|
||||
return np.concatenate(
|
||||
[
|
||||
data.qpos.flat[2:],
|
||||
data.qvel.flat,
|
||||
data.cinert.flat,
|
||||
data.cvel.flat,
|
||||
data.qfrc_actuator.flat,
|
||||
data.cfrc_ext.flat,
|
||||
]
|
||||
)
|
||||
|
||||
def step(self, a):
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
pos_after = self.sim.data.qpos[2]
|
||||
data = self.sim.data
|
||||
uph_cost = (pos_after - 0) / self.model.opt.timestep
|
||||
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
False,
|
||||
False,
|
||||
dict(
|
||||
reward_linup=uph_cost,
|
||||
reward_quadctrl=-quad_ctrl_cost,
|
||||
reward_impact=-quad_impact_cost,
|
||||
),
|
||||
)
|
||||
|
||||
def reset_model(self):
|
||||
c = 0.01
|
||||
self.set_state(
|
||||
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel
|
||||
+ self.np_random.uniform(
|
||||
low=-c,
|
||||
high=c,
|
||||
size=self.model.nv,
|
||||
),
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.trackbodyid = 1
|
||||
self.viewer.cam.distance = self.model.stat.extent * 1.0
|
||||
self.viewer.cam.lookat[2] = 0.8925
|
||||
self.viewer.cam.elevation = -20
|
@@ -1,70 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 20,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self,
|
||||
"inverted_double_pendulum.xml",
|
||||
5,
|
||||
observation_space=observation_space,
|
||||
**kwargs,
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def step(self, action):
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
|
||||
ob = self._get_obs()
|
||||
x, _, y = self.sim.data.site_xpos[0]
|
||||
dist_penalty = 0.01 * x**2 + (y - 2) ** 2
|
||||
v1, v2 = self.sim.data.qvel[1:3]
|
||||
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
|
||||
alive_bonus = 10
|
||||
r = alive_bonus - dist_penalty - vel_penalty
|
||||
terminated = bool(y <= 1)
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, r, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate(
|
||||
[
|
||||
self.sim.data.qpos[:1], # cart x pos
|
||||
np.sin(self.sim.data.qpos[1:]), # link angles
|
||||
np.cos(self.sim.data.qpos[1:]),
|
||||
np.clip(self.sim.data.qvel, -10, 10),
|
||||
np.clip(self.sim.data.qfrc_constraint, -10, 10),
|
||||
]
|
||||
).ravel()
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos
|
||||
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1,
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
v = self.viewer
|
||||
v.cam.trackbodyid = 0
|
||||
v.cam.distance = self.model.stat.extent * 0.5
|
||||
v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]
|
@@ -1,57 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 25,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self,
|
||||
"inverted_pendulum.xml",
|
||||
2,
|
||||
observation_space=observation_space,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def step(self, a):
|
||||
reward = 1.0
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
|
||||
ob = self._get_obs()
|
||||
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
size=self.model.nq, low=-0.01, high=0.01
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
size=self.model.nv, low=-0.01, high=0.01
|
||||
)
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.trackbodyid = 0
|
||||
self.viewer.cam.distance = self.model.stat.extent
|
@@ -1,350 +0,0 @@
|
||||
# type: ignore
|
||||
|
||||
from os import path
|
||||
from typing import Any, Dict, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium import error, logger, spaces
|
||||
from gymnasium.spaces import Space
|
||||
|
||||
|
||||
try:
|
||||
import mujoco_py
|
||||
except ImportError as e:
|
||||
MUJOCO_PY_IMPORT_ERROR = e
|
||||
else:
|
||||
MUJOCO_PY_IMPORT_ERROR = None
|
||||
|
||||
|
||||
# NOTE: duplication of analogous code in mujoco_env.py
|
||||
# Support for mujoco-py based envs is deprecated, so this module will no longer be maintained
|
||||
# The code is duplicated instead of imported for it to be working in standalone and independent
|
||||
# of the further development of the maintained mujoco_env.py
|
||||
# noinspection DuplicatedCode
|
||||
DEFAULT_SIZE = 480
|
||||
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
def expand_model_path(model_path: str) -> str:
|
||||
"""Expands the `model_path` to a full path if it starts with '~' or '.' or '/'."""
|
||||
if model_path.startswith(".") or model_path.startswith("/"):
|
||||
fullpath = model_path
|
||||
elif model_path.startswith("~"):
|
||||
fullpath = path.expanduser(model_path)
|
||||
else:
|
||||
fullpath = path.join(path.dirname(__file__), "assets", model_path)
|
||||
if not path.exists(fullpath):
|
||||
raise OSError(f"File {fullpath} does not exist")
|
||||
|
||||
return fullpath
|
||||
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
class BaseMujocoPyEnv(gym.Env[NDArray[np.float64], NDArray[np.float32]]):
|
||||
"""Superclass for all MuJoCo environments."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path,
|
||||
frame_skip,
|
||||
observation_space: Optional[Space],
|
||||
render_mode: Optional[str] = None,
|
||||
width: int = DEFAULT_SIZE,
|
||||
height: int = DEFAULT_SIZE,
|
||||
camera_id: Optional[int] = None,
|
||||
camera_name: Optional[str] = None,
|
||||
):
|
||||
"""Base abstract class for mujoco based environments.
|
||||
|
||||
Args:
|
||||
model_path: Path to the MuJoCo Model.
|
||||
frame_skip: Number of MuJoCo simulation steps per gym `step()`.
|
||||
observation_space: The observation space of the environment.
|
||||
render_mode: The `render_mode` used.
|
||||
width: The width of the render window.
|
||||
height: The height of the render window.
|
||||
camera_id: The camera ID used.
|
||||
camera_name: The name of the camera used (can not be used in conjunction with `camera_id`).
|
||||
|
||||
Raises:
|
||||
OSError: when the `model_path` does not exist.
|
||||
error.DependencyNotInstalled: When `mujoco` is not installed.
|
||||
"""
|
||||
self.fullpath = expand_model_path(model_path)
|
||||
|
||||
self.width = width
|
||||
self.height = height
|
||||
# may use width and height
|
||||
self.model, self.data = self._initialize_simulation()
|
||||
|
||||
self.init_qpos = self.data.qpos.ravel().copy()
|
||||
self.init_qvel = self.data.qvel.ravel().copy()
|
||||
|
||||
self.frame_skip = frame_skip
|
||||
|
||||
assert self.metadata["render_modes"] == [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
], self.metadata["render_modes"]
|
||||
if "render_fps" in self.metadata:
|
||||
assert (
|
||||
int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
|
||||
), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
|
||||
if observation_space is not None:
|
||||
self.observation_space = observation_space
|
||||
self._set_action_space()
|
||||
|
||||
self.render_mode = render_mode
|
||||
self.camera_name = camera_name
|
||||
self.camera_id = camera_id
|
||||
|
||||
def _set_action_space(self):
|
||||
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
|
||||
low, high = bounds.T
|
||||
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
||||
return self.action_space
|
||||
|
||||
# methods to override:
|
||||
# ----------------------------
|
||||
def step(
|
||||
self, action: NDArray[np.float32]
|
||||
) -> Tuple[NDArray[np.float64], np.float64, bool, bool, Dict[str, np.float64]]:
|
||||
raise NotImplementedError
|
||||
|
||||
def reset_model(self) -> NDArray[np.float64]:
|
||||
"""
|
||||
Reset the robot degrees of freedom (qpos and qvel).
|
||||
Implement this in each subclass.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _initialize_simulation(self) -> Tuple[Any, Any]:
|
||||
"""
|
||||
Initialize MuJoCo simulation data structures mjModel and mjData.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _reset_simulation(self) -> None:
|
||||
"""
|
||||
Reset MuJoCo simulation data structures, mjModel and mjData.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _step_mujoco_simulation(self, ctrl, n_frames) -> None:
|
||||
"""
|
||||
Step over the MuJoCo simulation.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def render(self) -> Union[NDArray[np.float64], None]:
|
||||
"""
|
||||
Render a frame from the MuJoCo simulation as specified by the render_mode.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
# -----------------------------
|
||||
def _get_reset_info(self) -> Dict[str, float]:
|
||||
"""Function that generates the `info` that is returned during a `reset()`."""
|
||||
return {}
|
||||
|
||||
def reset(
|
||||
self,
|
||||
*,
|
||||
seed: Optional[int] = None,
|
||||
options: Optional[dict] = None,
|
||||
):
|
||||
super().reset(seed=seed)
|
||||
|
||||
self._reset_simulation()
|
||||
|
||||
ob = self.reset_model()
|
||||
info = self._get_reset_info()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return ob, info
|
||||
|
||||
def set_state(self, qpos, qvel) -> None:
|
||||
"""
|
||||
Set the joints position qpos and velocity qvel of the model. Override this method depending on the MuJoCo bindings used.
|
||||
"""
|
||||
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
|
||||
|
||||
@property
|
||||
def dt(self) -> float:
|
||||
return self.model.opt.timestep * self.frame_skip
|
||||
|
||||
def do_simulation(self, ctrl, n_frames) -> None:
|
||||
"""
|
||||
Step the simulation n number of frames and applying a control action.
|
||||
"""
|
||||
# Check control input is contained in the action space
|
||||
if np.array(ctrl).shape != (self.model.nu,):
|
||||
raise ValueError(
|
||||
f"Action dimension mismatch. Expected {(self.model.nu,)}, found {np.array(ctrl).shape}"
|
||||
)
|
||||
self._step_mujoco_simulation(ctrl, n_frames)
|
||||
|
||||
def close(self):
|
||||
"""Close all processes like rendering contexts"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_body_com(self, body_name) -> NDArray[np.float64]:
|
||||
"""Return the cartesian position of a body frame"""
|
||||
raise NotImplementedError
|
||||
|
||||
def state_vector(self) -> NDArray[np.float64]:
|
||||
"""Return the position and velocity joint states of the model"""
|
||||
return np.concatenate([self.data.qpos.flat, self.data.qvel.flat])
|
||||
|
||||
|
||||
class MuJocoPyEnv(BaseMujocoPyEnv):
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
frame_skip: int,
|
||||
observation_space: Space,
|
||||
render_mode: Optional[str] = None,
|
||||
width: int = DEFAULT_SIZE,
|
||||
height: int = DEFAULT_SIZE,
|
||||
camera_id: Optional[int] = None,
|
||||
camera_name: Optional[str] = None,
|
||||
):
|
||||
if MUJOCO_PY_IMPORT_ERROR is not None:
|
||||
raise error.DependencyNotInstalled(
|
||||
f"{MUJOCO_PY_IMPORT_ERROR}. "
|
||||
"Could not import mujoco_py, which is needed for MuJoCo environments older than V4",
|
||||
"You could either use a newer version of the environments, or install the (deprecated) mujoco-py package"
|
||||
"following the instructions on their GitHub page.",
|
||||
)
|
||||
|
||||
logger.deprecation(
|
||||
"This version of the mujoco environments depends "
|
||||
"on the mujoco-py bindings, which are no longer maintained "
|
||||
"and may stop working. Please upgrade to the v5 or v4 versions of "
|
||||
"the environments (which depend on the mujoco python bindings instead), unless "
|
||||
"you are trying to precisely replicate previous works)."
|
||||
)
|
||||
|
||||
self.viewer = None
|
||||
self._viewers = {}
|
||||
|
||||
super().__init__(
|
||||
model_path,
|
||||
frame_skip,
|
||||
observation_space,
|
||||
render_mode,
|
||||
width,
|
||||
height,
|
||||
camera_id,
|
||||
camera_name,
|
||||
)
|
||||
|
||||
def _initialize_simulation(self):
|
||||
model = mujoco_py.load_model_from_path(self.fullpath)
|
||||
self.sim = mujoco_py.MjSim(model)
|
||||
data = self.sim.data
|
||||
return model, data
|
||||
|
||||
def _reset_simulation(self):
|
||||
self.sim.reset()
|
||||
|
||||
def set_state(self, qpos, qvel):
|
||||
super().set_state(qpos, qvel)
|
||||
state = self.sim.get_state()
|
||||
state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state)
|
||||
self.sim.set_state(state)
|
||||
self.sim.forward()
|
||||
|
||||
def get_body_com(self, body_name):
|
||||
return self.data.get_body_xpos(body_name)
|
||||
|
||||
def _step_mujoco_simulation(self, ctrl, n_frames):
|
||||
self.sim.data.ctrl[:] = ctrl
|
||||
|
||||
for _ in range(n_frames):
|
||||
self.sim.step()
|
||||
|
||||
def render(self):
|
||||
if self.render_mode is None:
|
||||
assert self.spec is not None
|
||||
gym.logger.warn(
|
||||
"You are calling render method without specifying any render mode. "
|
||||
"You can specify the render_mode at initialization, "
|
||||
f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
|
||||
)
|
||||
return
|
||||
|
||||
width, height = self.width, self.height
|
||||
camera_name, camera_id = self.camera_name, self.camera_id
|
||||
if self.render_mode in {"rgb_array", "depth_array"}:
|
||||
if camera_id is not None and camera_name is not None:
|
||||
raise ValueError(
|
||||
"Both `camera_id` and `camera_name` cannot be"
|
||||
" specified at the same time."
|
||||
)
|
||||
|
||||
no_camera_specified = camera_name is None and camera_id is None
|
||||
if no_camera_specified:
|
||||
camera_name = "track"
|
||||
|
||||
if camera_id is None and camera_name in self.model._camera_name2id:
|
||||
if camera_name in self.model._camera_name2id:
|
||||
camera_id = self.model.camera_name2id(camera_name)
|
||||
|
||||
self._get_viewer(self.render_mode).render(
|
||||
width, height, camera_id=camera_id
|
||||
)
|
||||
|
||||
if self.render_mode == "rgb_array":
|
||||
data = self._get_viewer(self.render_mode).read_pixels(
|
||||
width, height, depth=False
|
||||
)
|
||||
# original image is upside-down, so flip it
|
||||
return data[::-1, :, :]
|
||||
elif self.render_mode == "depth_array":
|
||||
self._get_viewer(self.render_mode).render(width, height)
|
||||
# Extract depth part of the read_pixels() tuple
|
||||
data = self._get_viewer(self.render_mode).read_pixels(
|
||||
width, height, depth=True
|
||||
)[1]
|
||||
# original image is upside-down, so flip it
|
||||
return data[::-1, :]
|
||||
elif self.render_mode == "human":
|
||||
self._get_viewer(self.render_mode).render()
|
||||
|
||||
def _get_viewer(
|
||||
self, mode
|
||||
) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]:
|
||||
self.viewer = self._viewers.get(mode)
|
||||
if self.viewer is None:
|
||||
if mode == "human":
|
||||
self.viewer = mujoco_py.MjViewer(self.sim)
|
||||
|
||||
elif mode in {"rgb_array", "depth_array"}:
|
||||
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
|
||||
else:
|
||||
raise AttributeError(
|
||||
f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}"
|
||||
)
|
||||
|
||||
self.viewer_setup()
|
||||
self._viewers[mode] = self.viewer
|
||||
|
||||
return self.viewer
|
||||
|
||||
def close(self):
|
||||
if self.viewer is not None:
|
||||
self.viewer = None
|
||||
self._viewers = {}
|
||||
|
||||
def viewer_setup(self):
|
||||
"""
|
||||
This method is called when the viewer is initialized.
|
||||
Optionally implement this method, if you need to tinker with camera position and so forth.
|
||||
"""
|
||||
raise NotImplementedError
|
@@ -1,85 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class PusherEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 20,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "pusher.xml", 5, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
def step(self, a):
|
||||
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
|
||||
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
|
||||
|
||||
reward_near = -np.linalg.norm(vec_1)
|
||||
reward_dist = -np.linalg.norm(vec_2)
|
||||
reward_ctrl = -np.square(a).sum()
|
||||
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
False,
|
||||
False,
|
||||
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
|
||||
)
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.trackbodyid = -1
|
||||
self.viewer.cam.distance = 4.0
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos
|
||||
|
||||
self.goal_pos = np.asarray([0, 0])
|
||||
while True:
|
||||
self.cylinder_pos = np.concatenate(
|
||||
[
|
||||
self.np_random.uniform(low=-0.3, high=0, size=1),
|
||||
self.np_random.uniform(low=-0.2, high=0.2, size=1),
|
||||
]
|
||||
)
|
||||
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
|
||||
break
|
||||
|
||||
qpos[-4:-2] = self.cylinder_pos
|
||||
qpos[-2:] = self.goal_pos
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=-0.005, high=0.005, size=self.model.nv
|
||||
)
|
||||
qvel[-4:] = 0
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate(
|
||||
[
|
||||
self.sim.data.qpos.flat[:7],
|
||||
self.sim.data.qvel.flat[:7],
|
||||
self.get_body_com("tips_arm"),
|
||||
self.get_body_com("object"),
|
||||
self.get_body_com("goal"),
|
||||
]
|
||||
)
|
@@ -1,76 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 50,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "reacher.xml", 2, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
def step(self, a):
|
||||
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
||||
reward_dist = -np.linalg.norm(vec)
|
||||
reward_ctrl = -np.square(a).sum()
|
||||
reward = reward_dist + reward_ctrl
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
False,
|
||||
False,
|
||||
dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl),
|
||||
)
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.trackbodyid = 0
|
||||
|
||||
def reset_model(self):
|
||||
qpos = (
|
||||
self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq)
|
||||
+ self.init_qpos
|
||||
)
|
||||
while True:
|
||||
self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2)
|
||||
if np.linalg.norm(self.goal) < 0.2:
|
||||
break
|
||||
qpos[-2:] = self.goal
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=-0.005, high=0.005, size=self.model.nv
|
||||
)
|
||||
qvel[-2:] = 0
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
theta = self.sim.data.qpos.flat[:2]
|
||||
return np.concatenate(
|
||||
[
|
||||
np.cos(theta),
|
||||
np.sin(theta),
|
||||
self.sim.data.qpos.flat[2:],
|
||||
self.sim.data.qvel.flat[:2],
|
||||
self.get_body_com("fingertip") - self.get_body_com("target"),
|
||||
]
|
||||
)
|
@@ -1,60 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 25,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "swimmer.xml", 4, observation_space=observation_space, **kwargs
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def step(self, a):
|
||||
ctrl_cost_coeff = 0.0001
|
||||
xposbefore = self.sim.data.qpos[0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
xposafter = self.sim.data.qpos[0]
|
||||
|
||||
reward_fwd = (xposafter - xposbefore) / self.dt
|
||||
reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
|
||||
reward = reward_fwd + reward_ctrl
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
False,
|
||||
False,
|
||||
dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl),
|
||||
)
|
||||
|
||||
def _get_obs(self):
|
||||
qpos = self.sim.data.qpos
|
||||
qvel = self.sim.data.qvel
|
||||
return np.concatenate([qpos.flat[2:], qvel.flat])
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos
|
||||
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq),
|
||||
self.init_qvel
|
||||
+ self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv),
|
||||
)
|
||||
return self._get_obs()
|
@@ -1,129 +0,0 @@
|
||||
__credits__ = ["Rushiv Arora"]
|
||||
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
DEFAULT_CAMERA_CONFIG = {}
|
||||
|
||||
|
||||
class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 25,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
xml_file="swimmer.xml",
|
||||
forward_reward_weight=1.0,
|
||||
ctrl_cost_weight=1e-4,
|
||||
reset_noise_scale=0.1,
|
||||
exclude_current_positions_from_observation=True,
|
||||
**kwargs,
|
||||
):
|
||||
utils.EzPickle.__init__(
|
||||
self,
|
||||
xml_file,
|
||||
forward_reward_weight,
|
||||
ctrl_cost_weight,
|
||||
reset_noise_scale,
|
||||
exclude_current_positions_from_observation,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._forward_reward_weight = forward_reward_weight
|
||||
self._ctrl_cost_weight = ctrl_cost_weight
|
||||
|
||||
self._reset_noise_scale = reset_noise_scale
|
||||
|
||||
self._exclude_current_positions_from_observation = (
|
||||
exclude_current_positions_from_observation
|
||||
)
|
||||
|
||||
if exclude_current_positions_from_observation:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64
|
||||
)
|
||||
else:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64
|
||||
)
|
||||
|
||||
MuJocoPyEnv.__init__(
|
||||
self, xml_file, 4, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
def control_cost(self, action):
|
||||
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
||||
return control_cost
|
||||
|
||||
def step(self, action):
|
||||
xy_position_before = self.sim.data.qpos[0:2].copy()
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
xy_position_after = self.sim.data.qpos[0:2].copy()
|
||||
|
||||
xy_velocity = (xy_position_after - xy_position_before) / self.dt
|
||||
x_velocity, y_velocity = xy_velocity
|
||||
|
||||
forward_reward = self._forward_reward_weight * x_velocity
|
||||
ctrl_cost = self.control_cost(action)
|
||||
|
||||
observation = self._get_obs()
|
||||
reward = forward_reward - ctrl_cost
|
||||
info = {
|
||||
"reward_fwd": forward_reward,
|
||||
"reward_ctrl": -ctrl_cost,
|
||||
"x_position": xy_position_after[0],
|
||||
"y_position": xy_position_after[1],
|
||||
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
|
||||
"x_velocity": x_velocity,
|
||||
"y_velocity": y_velocity,
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
position = self.sim.data.qpos.flat.copy()
|
||||
velocity = self.sim.data.qvel.flat.copy()
|
||||
|
||||
if self._exclude_current_positions_from_observation:
|
||||
position = position[2:]
|
||||
|
||||
observation = np.concatenate([position, velocity]).ravel()
|
||||
return observation
|
||||
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nq
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nv
|
||||
)
|
||||
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
||||
if isinstance(value, np.ndarray):
|
||||
getattr(self.viewer.cam, key)[:] = value
|
||||
else:
|
||||
setattr(self.viewer.cam, key, value)
|
@@ -1,61 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 125,
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64)
|
||||
MuJocoPyEnv.__init__(
|
||||
self, "walker2d.xml", 4, observation_space=observation_space, **kwargs
|
||||
)
|
||||
utils.EzPickle.__init__(self, **kwargs)
|
||||
|
||||
def step(self, a):
|
||||
posbefore = self.sim.data.qpos[0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
posafter, height, ang = self.sim.data.qpos[0:3]
|
||||
|
||||
alive_bonus = 1.0
|
||||
reward = (posafter - posbefore) / self.dt
|
||||
reward += alive_bonus
|
||||
reward -= 1e-3 * np.square(a).sum()
|
||||
terminated = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
qpos = self.sim.data.qpos
|
||||
qvel = self.sim.data.qvel
|
||||
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos
|
||||
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq),
|
||||
self.init_qvel
|
||||
+ self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv),
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
self.viewer.cam.trackbodyid = 2
|
||||
self.viewer.cam.distance = self.model.stat.extent * 0.5
|
||||
self.viewer.cam.lookat[2] = 1.15
|
||||
self.viewer.cam.elevation = -20
|
@@ -1,168 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from gymnasium import utils
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import MuJocoPyEnv
|
||||
from gymnasium.spaces import Box
|
||||
|
||||
|
||||
DEFAULT_CAMERA_CONFIG = {
|
||||
"trackbodyid": 2,
|
||||
"distance": 4.0,
|
||||
"lookat": np.array((0.0, 0.0, 1.15)),
|
||||
"elevation": -20.0,
|
||||
}
|
||||
|
||||
|
||||
class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
metadata = {
|
||||
"render_modes": [
|
||||
"human",
|
||||
"rgb_array",
|
||||
"depth_array",
|
||||
],
|
||||
"render_fps": 125,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
xml_file="walker2d.xml",
|
||||
forward_reward_weight=1.0,
|
||||
ctrl_cost_weight=1e-3,
|
||||
healthy_reward=1.0,
|
||||
terminate_when_unhealthy=True,
|
||||
healthy_z_range=(0.8, 2.0),
|
||||
healthy_angle_range=(-1.0, 1.0),
|
||||
reset_noise_scale=5e-3,
|
||||
exclude_current_positions_from_observation=True,
|
||||
**kwargs,
|
||||
):
|
||||
utils.EzPickle.__init__(
|
||||
self,
|
||||
xml_file,
|
||||
forward_reward_weight,
|
||||
ctrl_cost_weight,
|
||||
healthy_reward,
|
||||
terminate_when_unhealthy,
|
||||
healthy_z_range,
|
||||
healthy_angle_range,
|
||||
reset_noise_scale,
|
||||
exclude_current_positions_from_observation,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self._forward_reward_weight = forward_reward_weight
|
||||
self._ctrl_cost_weight = ctrl_cost_weight
|
||||
|
||||
self._healthy_reward = healthy_reward
|
||||
self._terminate_when_unhealthy = terminate_when_unhealthy
|
||||
|
||||
self._healthy_z_range = healthy_z_range
|
||||
self._healthy_angle_range = healthy_angle_range
|
||||
|
||||
self._reset_noise_scale = reset_noise_scale
|
||||
|
||||
self._exclude_current_positions_from_observation = (
|
||||
exclude_current_positions_from_observation
|
||||
)
|
||||
|
||||
if exclude_current_positions_from_observation:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64
|
||||
)
|
||||
else:
|
||||
observation_space = Box(
|
||||
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64
|
||||
)
|
||||
|
||||
MuJocoPyEnv.__init__(
|
||||
self, xml_file, 4, observation_space=observation_space, **kwargs
|
||||
)
|
||||
|
||||
@property
|
||||
def healthy_reward(self):
|
||||
return (
|
||||
float(self.is_healthy or self._terminate_when_unhealthy)
|
||||
* self._healthy_reward
|
||||
)
|
||||
|
||||
def control_cost(self, action):
|
||||
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
||||
return control_cost
|
||||
|
||||
@property
|
||||
def is_healthy(self):
|
||||
z, angle = self.sim.data.qpos[1:3]
|
||||
|
||||
min_z, max_z = self._healthy_z_range
|
||||
min_angle, max_angle = self._healthy_angle_range
|
||||
|
||||
healthy_z = min_z < z < max_z
|
||||
healthy_angle = min_angle < angle < max_angle
|
||||
is_healthy = healthy_z and healthy_angle
|
||||
|
||||
return is_healthy
|
||||
|
||||
@property
|
||||
def terminated(self):
|
||||
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
|
||||
return terminated
|
||||
|
||||
def _get_obs(self):
|
||||
position = self.sim.data.qpos.flat.copy()
|
||||
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
|
||||
|
||||
if self._exclude_current_positions_from_observation:
|
||||
position = position[1:]
|
||||
|
||||
observation = np.concatenate((position, velocity)).ravel()
|
||||
return observation
|
||||
|
||||
def step(self, action):
|
||||
x_position_before = self.sim.data.qpos[0]
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
x_position_after = self.sim.data.qpos[0]
|
||||
x_velocity = (x_position_after - x_position_before) / self.dt
|
||||
|
||||
ctrl_cost = self.control_cost(action)
|
||||
forward_reward = self._forward_reward_weight * x_velocity
|
||||
healthy_reward = self.healthy_reward
|
||||
|
||||
rewards = forward_reward + healthy_reward
|
||||
costs = ctrl_cost
|
||||
|
||||
observation = self._get_obs()
|
||||
reward = rewards - costs
|
||||
terminated = self.terminated
|
||||
info = {
|
||||
"x_position": x_position_after,
|
||||
"x_velocity": x_velocity,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
# truncation=False as the time limit is handled by the `TimeLimit` wrapper added during `make`
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
noise_low = -self._reset_noise_scale
|
||||
noise_high = self._reset_noise_scale
|
||||
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nq
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=noise_low, high=noise_high, size=self.model.nv
|
||||
)
|
||||
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def viewer_setup(self):
|
||||
assert self.viewer is not None
|
||||
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
||||
if isinstance(value, np.ndarray):
|
||||
getattr(self.viewer.cam, key)[:] = value
|
||||
else:
|
||||
setattr(self.viewer.cam, key, value)
|
@@ -36,11 +36,6 @@ atari = ["ale_py >=0.9"]
|
||||
box2d = ["box2d-py ==2.3.5", "pygame >=2.1.3", "swig ==4.*"]
|
||||
classic-control = ["pygame >=2.1.3"]
|
||||
classic_control = ["pygame >=2.1.3"] # kept for backward compatibility
|
||||
mujoco-py = ["mujoco-py >=2.1,<2.2", "cython<3"]
|
||||
mujoco_py = [
|
||||
"mujoco-py >=2.1,<2.2",
|
||||
"cython<3",
|
||||
] # kept for backward compatibility
|
||||
mujoco = ["mujoco >=2.1.5", "imageio >=2.14.1", "packaging >=23.0"]
|
||||
toy-text = ["pygame >=2.1.3"]
|
||||
toy_text = ["pygame >=2.1.3"] # kept for backward compatibility
|
||||
@@ -70,9 +65,6 @@ all = [
|
||||
"swig ==4.*",
|
||||
# classic-control
|
||||
"pygame >=2.1.3",
|
||||
# mujoco-py
|
||||
"mujoco-py >=2.1,<2.2",
|
||||
"cython <3",
|
||||
# mujoco
|
||||
"mujoco >=2.1.5",
|
||||
"imageio >=2.14.1",
|
||||
|
@@ -1,49 +1,11 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium import envs
|
||||
from gymnasium.envs.registration import EnvSpec
|
||||
from tests.envs.utils import mujoco_testing_env_specs
|
||||
|
||||
|
||||
EPS = 1e-6
|
||||
|
||||
|
||||
def verify_environments_match(
|
||||
old_env_id: str, new_env_id: str, seed: int = 1, num_actions: int = 1000
|
||||
):
|
||||
"""Verifies with two environment ids (old and new) are identical in obs, reward and done
|
||||
(except info where all old info must be contained in new info)."""
|
||||
old_env = envs.make(old_env_id)
|
||||
new_env = envs.make(new_env_id)
|
||||
|
||||
old_reset_obs, old_info = old_env.reset(seed=seed)
|
||||
new_reset_obs, new_info = new_env.reset(seed=seed)
|
||||
|
||||
np.testing.assert_allclose(old_reset_obs, new_reset_obs)
|
||||
|
||||
for i in range(num_actions):
|
||||
action = old_env.action_space.sample()
|
||||
old_obs, old_reward, old_terminated, old_truncated, old_info = old_env.step(
|
||||
action
|
||||
)
|
||||
new_obs, new_reward, new_terminated, new_truncated, new_info = new_env.step(
|
||||
action
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(old_obs, new_obs, atol=EPS)
|
||||
np.testing.assert_allclose(old_reward, new_reward, atol=EPS)
|
||||
np.testing.assert_equal(old_terminated, new_terminated)
|
||||
np.testing.assert_equal(old_truncated, new_truncated)
|
||||
|
||||
for key in old_info:
|
||||
np.testing.assert_allclose(old_info[key], new_info[key], atol=EPS)
|
||||
|
||||
if old_terminated or old_truncated:
|
||||
break
|
||||
|
||||
|
||||
EXCLUDE_POS_FROM_OBS = [
|
||||
"Ant",
|
||||
"HalfCheetah",
|
||||
@@ -101,23 +63,3 @@ def test_obs_space_mujoco_environments(env_spec: EnvSpec):
|
||||
assert env.observation_space.contains(
|
||||
step_obs
|
||||
), f"Observation returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space} when using contact forces."
|
||||
|
||||
|
||||
MUJOCO_V2_V3_ENVS = [
|
||||
spec.name
|
||||
for spec in mujoco_testing_env_specs
|
||||
if spec.version == 2 and f"{spec.name}-v3" in gym.envs.registry
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
|
||||
def test_mujoco_v2_to_v3_conversion(env_name: str):
|
||||
"""Checks that all v2 mujoco environments are the same as v3 environments."""
|
||||
verify_environments_match(f"{env_name}-v2", f"{env_name}-v3")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS)
|
||||
def test_mujoco_incompatible_v3_to_v2(env_name: str):
|
||||
"""Checks that the v3 environment are slightly different from v2, (v3 has additional info keys that v2 does not)."""
|
||||
with pytest.raises(KeyError):
|
||||
verify_environments_match(f"{env_name}-v3", f"{env_name}-v2")
|
||||
|
@@ -7,7 +7,6 @@ import pytest
|
||||
|
||||
import gymnasium as gym
|
||||
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
|
||||
from gymnasium.envs.mujoco.mujoco_py_env import BaseMujocoPyEnv
|
||||
from gymnasium.envs.mujoco.utils import check_mujoco_reset_state
|
||||
from gymnasium.error import Error
|
||||
from gymnasium.utils.env_checker import check_env
|
||||
@@ -37,18 +36,14 @@ ALL_MUJOCO_ENVS = [
|
||||
"Ant-v5",
|
||||
"HalfCheetah-v5",
|
||||
"HalfCheetah-v4",
|
||||
"HalfCheetah-v3",
|
||||
"Hopper-v5",
|
||||
"Hopper-v4",
|
||||
"Hopper-v3",
|
||||
"Humanoid-v5",
|
||||
"HumanoidStandup-v5",
|
||||
"Swimmer-v5",
|
||||
"Swimmer-v4",
|
||||
"Swimmer-v3",
|
||||
"Walker2d-v5",
|
||||
"Walker2d-v4",
|
||||
"Walker2d-v3",
|
||||
],
|
||||
)
|
||||
def test_verify_info_x_position(env_id: str):
|
||||
@@ -71,7 +66,6 @@ def test_verify_info_x_position(env_id: str):
|
||||
"HumanoidStandup-v5",
|
||||
"Swimmer-v5",
|
||||
"Swimmer-v4",
|
||||
"Swimmer-v3",
|
||||
],
|
||||
)
|
||||
def test_verify_info_y_position(env_id: str):
|
||||
@@ -86,11 +80,11 @@ def test_verify_info_y_position(env_id: str):
|
||||
|
||||
# Note: "HumnanoidStandup-v4" does not have `info`
|
||||
@pytest.mark.parametrize("env_name", ["HalfCheetah", "Hopper", "Swimmer", "Walker2d"])
|
||||
@pytest.mark.parametrize("version", ["v5", "v4", "v3"])
|
||||
@pytest.mark.parametrize("version", ["v5", "v4"])
|
||||
def test_verify_info_x_velocity(env_name: str, version: str):
|
||||
"""Asserts that the environment `info['x_velocity']` is properly assigned."""
|
||||
env = gym.make(f"{env_name}-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset()
|
||||
|
||||
old_x = env.data.qpos[0]
|
||||
@@ -103,11 +97,11 @@ def test_verify_info_x_velocity(env_name: str, version: str):
|
||||
|
||||
|
||||
# Note: "HumnanoidStandup-v4" does not have `info`
|
||||
@pytest.mark.parametrize("env_id", ["Swimmer-v5", "Swimmer-v4", "Swimmer-v3"])
|
||||
@pytest.mark.parametrize("env_id", ["Swimmer-v5", "Swimmer-v4"])
|
||||
def test_verify_info_y_velocity(env_id: str):
|
||||
"""Asserts that the environment `info['y_velocity']` is properly assigned."""
|
||||
env = gym.make(env_id).unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset()
|
||||
|
||||
old_y = env.data.qpos[1]
|
||||
@@ -119,11 +113,11 @@ def test_verify_info_y_velocity(env_id: str):
|
||||
assert vel_y == info["y_velocity"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("env_id", ["Ant-v5", "Ant-v4", "Ant-v3"])
|
||||
@pytest.mark.parametrize("env_id", ["Ant-v5", "Ant-v4"])
|
||||
def test_verify_info_xy_velocity_xpos(env_id: str):
|
||||
"""Asserts that the environment `info['x/y_velocity']` is properly assigned, for the ant environment which uses kinmatics for the velocity."""
|
||||
env = gym.make(env_id).unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset()
|
||||
|
||||
old_xy = env.get_body_com("torso")[:2].copy()
|
||||
@@ -136,7 +130,7 @@ def test_verify_info_xy_velocity_xpos(env_id: str):
|
||||
assert vel_y == info["y_velocity"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("env_id", ["Humanoid-v5", "Humanoid-v4", "Humanoid-v3"])
|
||||
@pytest.mark.parametrize("env_id", ["Humanoid-v5", "Humanoid-v4"])
|
||||
def test_verify_info_xy_velocity_com(env_id: str):
|
||||
"""Asserts that the environment `info['x/y_velocity']` is properly assigned, for the humanoid environment which uses kinmatics of Center Of Mass for the velocity."""
|
||||
|
||||
@@ -146,7 +140,7 @@ def test_verify_info_xy_velocity_com(env_id: str):
|
||||
return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy()
|
||||
|
||||
env = gym.make(env_id).unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset()
|
||||
|
||||
old_xy = mass_center(env.model, env.data)
|
||||
@@ -178,7 +172,7 @@ def test_verify_info_xy_velocity_com(env_id: str):
|
||||
def test_verify_reward_survive(env_name: str, version: str):
|
||||
"""Assert that `reward_survive` is 0 on `terminal` states and not 0 on non-`terminal` states."""
|
||||
env = gym.make(f"{env_name}-{version}", reset_noise_scale=0).unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset(seed=0)
|
||||
env.action_space.seed(1)
|
||||
|
||||
@@ -361,7 +355,7 @@ def test_ant_com(version: str):
|
||||
"""Verify the kinmatic behaviour of the ant."""
|
||||
# `env` contains `data : MjData` and `model : MjModel`
|
||||
env = gym.make(f"Ant-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset() # randomly initlizies the `data.qpos` and `data.qvel`, calls mujoco.mj_forward(env.model, env.data)
|
||||
|
||||
x_position_before = env.data.qpos[0]
|
||||
@@ -378,11 +372,11 @@ def test_ant_com(version: str):
|
||||
assert x_position_after == x_position_after_com, "after failed" # This succeeds
|
||||
|
||||
|
||||
@pytest.mark.parametrize("version", ["v5", "v4", "v3", "v2"])
|
||||
@pytest.mark.parametrize("version", ["v5", "v4"])
|
||||
def test_set_state(version: str):
|
||||
"""Simple Test to verify that `mujocoEnv.set_state()` works correctly."""
|
||||
env = gym.make(f"Hopper-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset()
|
||||
new_qpos = np.array(
|
||||
[0.00136962, 1.24769787, -0.00459026, -0.00483472, 0.0031327, 0.00412756]
|
||||
@@ -400,12 +394,12 @@ def test_set_state(version: str):
|
||||
# Note: Humanoid-v4/v3 fails this test
|
||||
# Note: v2 does not have `info`
|
||||
@pytest.mark.parametrize(
|
||||
"env_id", ["Ant-v5", "Humanoid-v5", "Swimmer-v5", "Swimmer-v4", "Swimmer-v3"]
|
||||
"env_id", ["Ant-v5", "Humanoid-v5", "Swimmer-v5", "Swimmer-v4"]
|
||||
)
|
||||
def test_distance_from_origin_info(env_id: str):
|
||||
"""Verify that `info"distance_from_origin"` is correct."""
|
||||
env = gym.make(env_id).unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset()
|
||||
|
||||
_, _, _, _, info = env.step(env.action_space.sample())
|
||||
@@ -487,7 +481,7 @@ def test_reset_info(env_name: str, version: str):
|
||||
def test_inverted_double_pendulum_max_height(version: str):
|
||||
"""Verify the max height of Inverted Double Pendulum."""
|
||||
env = gym.make(f"InvertedDoublePendulum-{version}", reset_noise_scale=0).unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.reset()
|
||||
|
||||
y = env.data.site_xpos[0][2]
|
||||
@@ -498,7 +492,7 @@ def test_inverted_double_pendulum_max_height(version: str):
|
||||
def test_inverted_double_pendulum_max_height_old(version: str):
|
||||
"""Verify the max height of Inverted Double Pendulum (v4 does not have `reset_noise_scale` argument)."""
|
||||
env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
env.set_state(env.init_qpos, env.init_qvel)
|
||||
|
||||
y = env.data.site_xpos[0][2]
|
||||
@@ -510,7 +504,7 @@ def test_inverted_double_pendulum_max_height_old(version: str):
|
||||
def test_model_object_count(version: str):
|
||||
"""Verify that all the objects of the model are loaded, mostly useful for using non-mujoco simulator."""
|
||||
env = gym.make(f"Ant-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 15
|
||||
assert env.model.nv == 14
|
||||
assert env.model.nu == 8
|
||||
@@ -521,7 +515,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 0
|
||||
|
||||
env = gym.make(f"HalfCheetah-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 9
|
||||
assert env.model.nv == 9
|
||||
assert env.model.nu == 6
|
||||
@@ -532,7 +526,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 0
|
||||
|
||||
env = gym.make(f"Hopper-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 6
|
||||
assert env.model.nv == 6
|
||||
assert env.model.nu == 3
|
||||
@@ -543,7 +537,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 0
|
||||
|
||||
env = gym.make(f"Humanoid-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 24
|
||||
assert env.model.nv == 23
|
||||
assert env.model.nu == 17
|
||||
@@ -554,7 +548,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 2
|
||||
|
||||
env = gym.make(f"HumanoidStandup-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 24
|
||||
assert env.model.nv == 23
|
||||
assert env.model.nu == 17
|
||||
@@ -565,7 +559,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 2
|
||||
|
||||
env = gym.make(f"InvertedDoublePendulum-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 3
|
||||
assert env.model.nv == 3
|
||||
assert env.model.nu == 1
|
||||
@@ -576,7 +570,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 0
|
||||
|
||||
env = gym.make(f"InvertedPendulum-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 2
|
||||
assert env.model.nv == 2
|
||||
assert env.model.nu == 1
|
||||
@@ -588,7 +582,7 @@ def test_model_object_count(version: str):
|
||||
|
||||
if not (version == "v4" and mujoco.__version__ >= "3.0.0"):
|
||||
env = gym.make(f"Pusher-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 11
|
||||
assert env.model.nv == 11
|
||||
assert env.model.nu == 7
|
||||
@@ -607,7 +601,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 0
|
||||
|
||||
env = gym.make(f"Reacher-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 4
|
||||
assert env.model.nv == 4
|
||||
assert env.model.nu == 2
|
||||
@@ -619,7 +613,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 0
|
||||
|
||||
env = gym.make(f"Swimmer-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, (MujocoEnv))
|
||||
assert env.model.nq == 5
|
||||
assert env.model.nv == 5
|
||||
assert env.model.nu == 2
|
||||
@@ -631,7 +625,7 @@ def test_model_object_count(version: str):
|
||||
assert env.model.ntendon == 0
|
||||
|
||||
env = gym.make(f"Walker2d-{version}").unwrapped
|
||||
assert isinstance(env, (MujocoEnv, BaseMujocoPyEnv))
|
||||
assert isinstance(env, MujocoEnv)
|
||||
assert env.model.nq == 9
|
||||
assert env.model.nv == 9
|
||||
assert env.model.nu == 6
|
||||
@@ -643,7 +637,7 @@ def test_model_object_count(version: str):
|
||||
|
||||
|
||||
# note: fails with `mujoco-mjx==3.0.1`
|
||||
@pytest.mark.parametrize("version", ["v5", "v4", "v3", "v2"])
|
||||
@pytest.mark.parametrize("version", ["v5", "v4"])
|
||||
def test_model_sensors(version: str):
|
||||
"""Verify that all the sensors of the model are loaded."""
|
||||
env = gym.make(f"Ant-{version}").unwrapped
|
||||
@@ -682,25 +676,19 @@ def test_dt():
|
||||
[
|
||||
"Ant-v5",
|
||||
"Ant-v4",
|
||||
"Ant-v3",
|
||||
"HalfCheetah-v5",
|
||||
"HalfCheetah-v4",
|
||||
"HalfCheetah-v3",
|
||||
"Hopper-v5",
|
||||
"Hopper-v4",
|
||||
"Hopper-v3",
|
||||
"Humanoid-v5",
|
||||
"Humanoid-v4",
|
||||
"Humanoid-v3",
|
||||
"HumanoidStandup-v5",
|
||||
"InvertedDoublePendulum-v5",
|
||||
"InvertedPendulum-v5",
|
||||
"Swimmer-v5",
|
||||
"Swimmer-v4",
|
||||
"Swimmer-v3",
|
||||
"Walker2d-v5",
|
||||
"Walker2d-v4",
|
||||
"Walker2d-v3",
|
||||
],
|
||||
)
|
||||
def test_reset_noise_scale(env_id):
|
||||
|
@@ -13,7 +13,6 @@ from tests.envs.utils import all_testing_env_specs, all_testing_initialised_envs
|
||||
# This runs a smoketest on each official registered env. We may want
|
||||
# to try also running environments which are not officially registered envs.
|
||||
PASSIVE_CHECK_IGNORE_WARNING = [
|
||||
r"\x1b\[33mWARN: This version of the mujoco environments depends on the mujoco-py bindings, which are no longer maintained and may stop working\. Please upgrade to the v4 versions of the environments \(which depend on the mujoco python bindings instead\), unless you are trying to precisely replicate previous works\)\.\x1b\[0m",
|
||||
r"\x1b\[33mWARN: The environment (.*?) is out of date\. You should consider upgrading to version `v(\d)`\.\x1b\[0m",
|
||||
]
|
||||
|
||||
@@ -21,7 +20,6 @@ PASSIVE_CHECK_IGNORE_WARNING = [
|
||||
CHECK_ENV_IGNORE_WARNINGS = [
|
||||
f"\x1b[33mWARN: {message}\x1b[0m"
|
||||
for message in [
|
||||
"This version of the mujoco environments depends on the mujoco-py bindings, which are no longer maintained and may stop working. Please upgrade to the v4 versions of the environments (which depend on the mujoco python bindings instead), unless you are trying to precisely replicate previous works).",
|
||||
"A Box observation space minimum value is -infinity. This is probably too low.",
|
||||
"A Box observation space maximum value is infinity. This is probably too high.",
|
||||
"For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.",
|
||||
|
@@ -43,21 +43,13 @@ def check_rendered(rendered_frame, mode: str):
|
||||
|
||||
|
||||
# We do not check render_mode for some mujoco envs and any old Gym environment wrapped by `GymEnvironment`
|
||||
render_mode_env_specs = [
|
||||
spec
|
||||
for spec in all_testing_env_specs
|
||||
if "mujoco" not in spec.entry_point or spec.version >= 4
|
||||
]
|
||||
render_mode_env_specs = [spec for spec in all_testing_env_specs]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"spec", render_mode_env_specs, ids=[spec.id for spec in render_mode_env_specs]
|
||||
)
|
||||
def test_render_modes(spec):
|
||||
"""There is a known issue where rendering a mujoco environment then mujoco-py will cause an error on non-mac based systems.
|
||||
|
||||
Therefore, we are only testing with mujoco environments.
|
||||
"""
|
||||
env = spec.make()
|
||||
|
||||
assert "rgb_array" in env.metadata["render_modes"]
|
||||
|
Reference in New Issue
Block a user