Files
Gymnasium/gym/envs/mujoco/ant_v3.py

185 lines
5.5 KiB
Python
Raw Normal View History

import numpy as np
from gym import utils
2022-07-06 11:18:03 -04:00
from gym.envs.mujoco import MuJocoPyEnv
from gym.spaces import Box
DEFAULT_CAMERA_CONFIG = {
2021-07-29 02:26:34 +02:00
"distance": 4.0,
}
2022-07-06 11:18:03 -04:00
class AntEnv(MuJocoPyEnv, utils.EzPickle):
2022-06-19 21:50:31 +01:00
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 20,
}
2021-07-29 02:26:34 +02:00
def __init__(
self,
xml_file="ant.xml",
ctrl_cost_weight=0.5,
contact_cost_weight=5e-4,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True,
**kwargs
2021-07-29 02:26:34 +02:00
):
utils.EzPickle.__init__(
self,
xml_file,
ctrl_cost_weight,
contact_cost_weight,
healthy_reward,
terminate_when_unhealthy,
healthy_z_range,
contact_force_range,
reset_noise_scale,
exclude_current_positions_from_observation,
**kwargs
)
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range
self._contact_force_range = contact_force_range
self._reset_noise_scale = reset_noise_scale
2021-07-29 15:39:42 -04:00
self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation
)
if exclude_current_positions_from_observation:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64
)
else:
observation_space = Box(
low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64
)
2022-07-06 11:18:03 -04:00
MuJocoPyEnv.__init__(
self, xml_file, 5, observation_space=observation_space, **kwargs
)
@property
def healthy_reward(self):
2021-07-29 15:39:42 -04:00
return (
float(self.is_healthy or self._terminate_when_unhealthy)
* self._healthy_reward
)
def control_cost(self, action):
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return control_cost
@property
def contact_forces(self):
raw_contact_forces = self.sim.data.cfrc_ext
min_value, max_value = self._contact_force_range
contact_forces = np.clip(raw_contact_forces, min_value, max_value)
return contact_forces
@property
def contact_cost(self):
2021-07-29 15:39:42 -04:00
contact_cost = self._contact_cost_weight * np.sum(
np.square(self.contact_forces)
)
return contact_cost
@property
def is_healthy(self):
state = self.state_vector()
min_z, max_z = self._healthy_z_range
2021-07-29 02:26:34 +02:00
is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
return is_healthy
@property
def terminated(self):
terminated = not self.is_healthy if self._terminate_when_unhealthy else False
return terminated
def step(self, action):
xy_position_before = self.get_body_com("torso")[:2].copy()
self.do_simulation(action, self.frame_skip)
xy_position_after = self.get_body_com("torso")[:2].copy()
xy_velocity = (xy_position_after - xy_position_before) / self.dt
x_velocity, y_velocity = xy_velocity
ctrl_cost = self.control_cost(action)
contact_cost = self.contact_cost
forward_reward = x_velocity
healthy_reward = self.healthy_reward
rewards = forward_reward + healthy_reward
costs = ctrl_cost + contact_cost
reward = rewards - costs
terminated = self.terminated
observation = self._get_obs()
info = {
2021-07-29 02:26:34 +02:00
"reward_forward": forward_reward,
"reward_ctrl": -ctrl_cost,
"reward_contact": -contact_cost,
"reward_survive": healthy_reward,
"x_position": xy_position_after[0],
"y_position": xy_position_after[1],
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
"x_velocity": x_velocity,
"y_velocity": y_velocity,
"forward_reward": forward_reward,
}
return observation, reward, terminated, False, info
def _get_obs(self):
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
contact_force = self.contact_forces.flat.copy()
if self._exclude_current_positions_from_observation:
position = position[2:]
observations = np.concatenate((position, velocity, contact_force))
return observations
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
2021-07-29 15:39:42 -04:00
qpos = self.init_qpos + self.np_random.uniform(
low=noise_low, high=noise_high, size=self.model.nq
)
Seeding update (#2422) * Ditch most of the seeding.py and replace np_random with the numpy default_rng. Let's see if tests pass * Updated a bunch of RNG calls from the RandomState API to Generator API * black; didn't expect that, did ya? * Undo a typo * blaaack * More typo fixes * Fixed setting/getting state in multidiscrete spaces * Fix typo, fix a test to work with the new sampling * Correctly (?) pass the randomly generated seed if np_random is called with None as seed * Convert the Discrete sample to a python int (as opposed to np.int64) * Remove some redundant imports * First version of the compatibility layer for old-style RNG. Mainly to trigger tests. * Removed redundant f-strings * Style fixes, removing unused imports * Try to make tests pass by removing atari from the dockerfile * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * First attempt at deprecating `env.seed` and supporting `env.reset(seed=seed)` instead. Tests should hopefully pass but throw up a million warnings. * black; didn't expect that, didya? * Rename the reset parameter in VecEnvs back to `seed` * Updated tests to use the new seeding method * Removed a bunch of old `seed` calls. Fixed a bug in AsyncVectorEnv * Stop Discrete envs from doing part of the setup (and using the randomness) in init (as opposed to reset) * Add explicit seed to wrappers reset * Remove an accidental return * Re-add some legacy functions with a warning. * Use deprecation instead of regular warnings for the newly deprecated methods/functions
2021-12-08 22:14:15 +01:00
qvel = (
self.init_qvel
+ self._reset_noise_scale * self.np_random.standard_normal(self.model.nv)
2021-07-29 15:39:42 -04:00
)
self.set_state(qpos, qvel)
observation = self._get_obs()
return observation
def viewer_setup(self):
assert self.viewer is not None
for key, value in DEFAULT_CAMERA_CONFIG.items():
if isinstance(value, np.ndarray):
getattr(self.viewer.cam, key)[:] = value
else:
setattr(self.viewer.cam, key, value)