Files
Gymnasium/gym/envs/mujoco/mujoco_env.py

197 lines
6.2 KiB
Python
Raw Normal View History

from collections import OrderedDict
2016-04-30 22:47:51 -07:00
import os
2016-04-27 08:00:58 -07:00
2016-04-30 22:47:51 -07:00
from gym import error, spaces
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
from gym.utils import seeding
2016-04-27 08:00:58 -07:00
import numpy as np
2016-04-30 22:47:51 -07:00
from os import path
2016-04-27 08:00:58 -07:00
import gym
try:
import mujoco_py
except ImportError as e:
2021-07-29 02:26:34 +02:00
raise error.DependencyNotInstalled(
"{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
e
)
)
2016-04-27 08:00:58 -07:00
2018-05-30 01:39:49 -07:00
DEFAULT_SIZE = 500
def convert_observation_to_space(observation):
if isinstance(observation, dict):
2021-07-29 15:39:42 -04:00
space = spaces.Dict(
OrderedDict(
[
(key, convert_observation_to_space(value))
for key, value in observation.items()
]
)
)
elif isinstance(observation, np.ndarray):
2021-07-29 02:26:34 +02:00
low = np.full(observation.shape, -float("inf"), dtype=np.float32)
high = np.full(observation.shape, float("inf"), dtype=np.float32)
space = spaces.Box(low, high, dtype=observation.dtype)
else:
raise NotImplementedError(type(observation), observation)
return space
2016-04-27 08:00:58 -07:00
class MujocoEnv(gym.Env):
2021-07-29 02:26:34 +02:00
"""Superclass for all MuJoCo environments."""
2016-04-30 22:47:51 -07:00
def __init__(self, model_path, frame_skip):
2016-04-27 08:00:58 -07:00
if model_path.startswith("/"):
fullpath = model_path
else:
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
2016-04-30 22:47:51 -07:00
if not path.exists(fullpath):
raise OSError(f"File {fullpath} does not exist")
self.frame_skip = frame_skip
self.model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
2016-04-27 08:00:58 -07:00
self.viewer = None
2018-05-30 01:39:49 -07:00
self._viewers = {}
2016-04-27 08:00:58 -07:00
self.metadata = {
2021-07-29 02:26:34 +02:00
"render.modes": ["human", "rgb_array", "depth_array"],
"video.frames_per_second": int(np.round(1.0 / self.dt)),
2016-04-27 08:00:58 -07:00
}
self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.sim.data.qvel.ravel().copy()
self._set_action_space()
action = self.action_space.sample()
observation, _reward, done, _info = self.step(action)
2016-04-30 22:47:51 -07:00
assert not done
self._set_observation_space(observation)
self.seed()
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
2016-04-30 22:47:51 -07:00
def _set_observation_space(self, observation):
self.observation_space = convert_observation_to_space(observation)
return self.observation_space
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
return [seed]
2016-04-30 22:47:51 -07:00
# methods to override:
# ----------------------------
def reset_model(self):
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
2016-04-30 22:47:51 -07:00
Optionally implement this method, if you need to tinker with camera position
and so forth.
"""
pass
# -----------------------------
def reset(self):
self.sim.reset()
2016-04-30 22:47:51 -07:00
ob = self.reset_model()
return ob
def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
old_state = self.sim.get_state()
2021-07-29 15:39:42 -04:00
new_state = mujoco_py.MjSimState(
old_state.time, qpos, qvel, old_state.act, old_state.udd_state
)
self.sim.set_state(new_state)
self.sim.forward()
2016-04-30 22:47:51 -07:00
2016-04-27 08:00:58 -07:00
@property
def dt(self):
return self.model.opt.timestep * self.frame_skip
def do_simulation(self, ctrl, n_frames):
self.sim.data.ctrl[:] = ctrl
2016-04-27 08:00:58 -07:00
for _ in range(n_frames):
self.sim.step()
2016-04-27 08:00:58 -07:00
2021-07-29 02:26:34 +02:00
def render(
self,
mode="human",
width=DEFAULT_SIZE,
height=DEFAULT_SIZE,
camera_id=None,
camera_name=None,
):
if mode == "rgb_array" or mode == "depth_array":
if camera_id is not None and camera_name is not None:
2021-07-29 15:39:42 -04:00
raise ValueError(
"Both `camera_id` and `camera_name` cannot be"
" specified at the same time."
)
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
2021-07-29 02:26:34 +02:00
camera_name = "track"
2019-08-23 15:39:03 -07:00
if camera_id is None and camera_name in self.model._camera_name2id:
camera_id = self.model.camera_name2id(camera_name)
self._get_viewer(mode).render(width, height, camera_id=camera_id)
2021-07-29 02:26:34 +02:00
if mode == "rgb_array":
# window size used for old mujoco-py:
2018-05-30 01:39:49 -07:00
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
# original image is upside-down, so flip it
return data[::-1, :, :]
2021-07-29 02:26:34 +02:00
elif mode == "depth_array":
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
# original image is upside-down, so flip it
return data[::-1, :]
2021-07-29 02:26:34 +02:00
elif mode == "human":
2018-05-30 01:39:49 -07:00
self._get_viewer(mode).render()
2016-04-27 08:00:58 -07:00
def close(self):
if self.viewer is not None:
# self.viewer.finish()
self.viewer = None
2018-05-30 01:39:49 -07:00
self._viewers = {}
2018-05-30 01:39:49 -07:00
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
2016-04-27 08:00:58 -07:00
if self.viewer is None:
2021-07-29 02:26:34 +02:00
if mode == "human":
2018-05-30 01:39:49 -07:00
self.viewer = mujoco_py.MjViewer(self.sim)
2021-07-29 02:26:34 +02:00
elif mode == "rgb_array" or mode == "depth_array":
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
2016-04-27 08:00:58 -07:00
self.viewer_setup()
2018-05-30 01:39:49 -07:00
self._viewers[mode] = self.viewer
2016-04-27 08:00:58 -07:00
return self.viewer
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
2016-04-27 08:00:58 -07:00
2016-04-30 22:47:51 -07:00
def state_vector(self):
2021-07-29 02:26:34 +02:00
return np.concatenate([self.sim.data.qpos.flat, self.sim.data.qvel.flat])