Files
Gymnasium/gym/envs/robotics/robot_env.py

180 lines
6.1 KiB
Python
Raw Normal View History

2018-02-26 17:35:07 +01:00
import os
import copy
Seeding update (#2422) * Ditch most of the seeding.py and replace np_random with the numpy default_rng. Let's see if tests pass * Updated a bunch of RNG calls from the RandomState API to Generator API * black; didn't expect that, did ya? * Undo a typo * blaaack * More typo fixes * Fixed setting/getting state in multidiscrete spaces * Fix typo, fix a test to work with the new sampling * Correctly (?) pass the randomly generated seed if np_random is called with None as seed * Convert the Discrete sample to a python int (as opposed to np.int64) * Remove some redundant imports * First version of the compatibility layer for old-style RNG. Mainly to trigger tests. * Removed redundant f-strings * Style fixes, removing unused imports * Try to make tests pass by removing atari from the dockerfile * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * First attempt at deprecating `env.seed` and supporting `env.reset(seed=seed)` instead. Tests should hopefully pass but throw up a million warnings. * black; didn't expect that, didya? * Rename the reset parameter in VecEnvs back to `seed` * Updated tests to use the new seeding method * Removed a bunch of old `seed` calls. Fixed a bug in AsyncVectorEnv * Stop Discrete envs from doing part of the setup (and using the randomness) in init (as opposed to reset) * Add explicit seed to wrappers reset * Remove an accidental return * Re-add some legacy functions with a warning. * Use deprecation instead of regular warnings for the newly deprecated methods/functions
2021-12-08 22:14:15 +01:00
from typing import Optional
2018-02-26 17:35:07 +01:00
import numpy as np
import gym
from gym import error, spaces
from gym.utils import seeding
try:
import mujoco_py
except ImportError as e:
2021-07-29 02:26:34 +02:00
raise error.DependencyNotInstalled(
"{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
e
)
)
2018-02-26 17:35:07 +01:00
DEFAULT_SIZE = 500
2018-02-26 17:35:07 +01:00
2021-07-29 02:26:34 +02:00
2018-02-26 17:35:07 +01:00
class RobotEnv(gym.GoalEnv):
def __init__(self, model_path, initial_qpos, n_actions, n_substeps):
2021-07-29 02:26:34 +02:00
if model_path.startswith("/"):
2018-02-26 17:35:07 +01:00
fullpath = model_path
else:
2021-07-29 02:26:34 +02:00
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
2018-02-26 17:35:07 +01:00
if not os.path.exists(fullpath):
raise OSError(f"File {fullpath} does not exist")
2018-02-26 17:35:07 +01:00
model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps)
self.viewer = None
self._viewers = {}
2018-02-26 17:35:07 +01:00
self.metadata = {
2021-07-29 02:26:34 +02:00
"render.modes": ["human", "rgb_array"],
"video.frames_per_second": int(np.round(1.0 / self.dt)),
2018-02-26 17:35:07 +01:00
}
self._env_setup(initial_qpos=initial_qpos)
self.initial_state = copy.deepcopy(self.sim.get_state())
self.goal = self._sample_goal()
obs = self._get_obs()
2021-07-29 02:26:34 +02:00
self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32")
self.observation_space = spaces.Dict(
dict(
2021-07-29 15:39:42 -04:00
desired_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
achieved_goal=spaces.Box(
-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32"
),
observation=spaces.Box(
-np.inf, np.inf, shape=obs["observation"].shape, dtype="float32"
),
2021-07-29 02:26:34 +02:00
)
)
2018-02-26 17:35:07 +01:00
@property
def dt(self):
return self.sim.model.opt.timestep * self.sim.nsubsteps
# Env methods
# ----------------------------
def step(self, action):
if np.array(action).shape != self.action_space.shape:
raise ValueError("Action dimension mismatch")
2018-02-26 17:35:07 +01:00
action = np.clip(action, self.action_space.low, self.action_space.high)
self._set_action(action)
self.sim.step()
self._step_callback()
obs = self._get_obs()
done = False
info = {
2021-07-29 02:26:34 +02:00
"is_success": self._is_success(obs["achieved_goal"], self.goal),
2018-02-26 17:35:07 +01:00
}
2021-07-29 02:26:34 +02:00
reward = self.compute_reward(obs["achieved_goal"], self.goal, info)
2018-02-26 17:35:07 +01:00
return obs, reward, done, info
Seeding update (#2422) * Ditch most of the seeding.py and replace np_random with the numpy default_rng. Let's see if tests pass * Updated a bunch of RNG calls from the RandomState API to Generator API * black; didn't expect that, did ya? * Undo a typo * blaaack * More typo fixes * Fixed setting/getting state in multidiscrete spaces * Fix typo, fix a test to work with the new sampling * Correctly (?) pass the randomly generated seed if np_random is called with None as seed * Convert the Discrete sample to a python int (as opposed to np.int64) * Remove some redundant imports * First version of the compatibility layer for old-style RNG. Mainly to trigger tests. * Removed redundant f-strings * Style fixes, removing unused imports * Try to make tests pass by removing atari from the dockerfile * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * First attempt at deprecating `env.seed` and supporting `env.reset(seed=seed)` instead. Tests should hopefully pass but throw up a million warnings. * black; didn't expect that, didya? * Rename the reset parameter in VecEnvs back to `seed` * Updated tests to use the new seeding method * Removed a bunch of old `seed` calls. Fixed a bug in AsyncVectorEnv * Stop Discrete envs from doing part of the setup (and using the randomness) in init (as opposed to reset) * Add explicit seed to wrappers reset * Remove an accidental return * Re-add some legacy functions with a warning. * Use deprecation instead of regular warnings for the newly deprecated methods/functions
2021-12-08 22:14:15 +01:00
def reset(self, seed: Optional[int] = None):
2018-02-26 17:35:07 +01:00
# Attempt to reset the simulator. Since we randomize initial conditions, it
# is possible to get into a state with numerical issues (e.g. due to penetration or
# Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand).
# In this case, we just keep randomizing until we eventually achieve a valid initial
# configuration.
Seeding update (#2422) * Ditch most of the seeding.py and replace np_random with the numpy default_rng. Let's see if tests pass * Updated a bunch of RNG calls from the RandomState API to Generator API * black; didn't expect that, did ya? * Undo a typo * blaaack * More typo fixes * Fixed setting/getting state in multidiscrete spaces * Fix typo, fix a test to work with the new sampling * Correctly (?) pass the randomly generated seed if np_random is called with None as seed * Convert the Discrete sample to a python int (as opposed to np.int64) * Remove some redundant imports * First version of the compatibility layer for old-style RNG. Mainly to trigger tests. * Removed redundant f-strings * Style fixes, removing unused imports * Try to make tests pass by removing atari from the dockerfile * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * First attempt at deprecating `env.seed` and supporting `env.reset(seed=seed)` instead. Tests should hopefully pass but throw up a million warnings. * black; didn't expect that, didya? * Rename the reset parameter in VecEnvs back to `seed` * Updated tests to use the new seeding method * Removed a bunch of old `seed` calls. Fixed a bug in AsyncVectorEnv * Stop Discrete envs from doing part of the setup (and using the randomness) in init (as opposed to reset) * Add explicit seed to wrappers reset * Remove an accidental return * Re-add some legacy functions with a warning. * Use deprecation instead of regular warnings for the newly deprecated methods/functions
2021-12-08 22:14:15 +01:00
super().reset(seed=seed)
2018-02-26 17:35:07 +01:00
did_reset_sim = False
while not did_reset_sim:
did_reset_sim = self._reset_sim()
self.goal = self._sample_goal().copy()
obs = self._get_obs()
return obs
def close(self):
if self.viewer is not None:
# self.viewer.finish()
2018-02-26 17:35:07 +01:00
self.viewer = None
self._viewers = {}
2018-02-26 17:35:07 +01:00
2021-07-29 02:26:34 +02:00
def render(self, mode="human", width=DEFAULT_SIZE, height=DEFAULT_SIZE):
2018-02-26 17:35:07 +01:00
self._render_callback()
2021-07-29 02:26:34 +02:00
if mode == "rgb_array":
self._get_viewer(mode).render(width, height)
2018-02-26 17:35:07 +01:00
# window size used for old mujoco-py:
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
2018-02-26 17:35:07 +01:00
# original image is upside-down, so flip it
return data[::-1, :, :]
2021-07-29 02:26:34 +02:00
elif mode == "human":
self._get_viewer(mode).render()
2018-02-26 17:35:07 +01:00
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
2018-02-26 17:35:07 +01:00
if self.viewer is None:
2021-07-29 02:26:34 +02:00
if mode == "human":
self.viewer = mujoco_py.MjViewer(self.sim)
2021-07-29 02:26:34 +02:00
elif mode == "rgb_array":
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, device_id=-1)
2018-02-26 17:35:07 +01:00
self._viewer_setup()
self._viewers[mode] = self.viewer
2018-02-26 17:35:07 +01:00
return self.viewer
# Extension methods
# ----------------------------
def _reset_sim(self):
"""Resets a simulation and indicates whether or not it was successful.
If a reset was unsuccessful (e.g. if a randomized state caused an error in the
simulation), this method should indicate such a failure by returning False.
In such a case, this method will be called again to attempt a the reset again.
"""
self.sim.set_state(self.initial_state)
self.sim.forward()
return True
def _get_obs(self):
2021-07-29 02:26:34 +02:00
"""Returns the observation."""
2018-02-26 17:35:07 +01:00
raise NotImplementedError()
def _set_action(self, action):
2021-07-29 02:26:34 +02:00
"""Applies the given action to the simulation."""
2018-02-26 17:35:07 +01:00
raise NotImplementedError()
def _is_success(self, achieved_goal, desired_goal):
2021-07-29 02:26:34 +02:00
"""Indicates whether or not the achieved goal successfully achieved the desired goal."""
2018-02-26 17:35:07 +01:00
raise NotImplementedError()
def _sample_goal(self):
2021-07-29 02:26:34 +02:00
"""Samples a new goal and returns it."""
2018-02-26 17:35:07 +01:00
raise NotImplementedError()
def _env_setup(self, initial_qpos):
"""Initial configuration of the environment. Can be used to configure initial state
and extract information from the simulation.
"""
pass
def _viewer_setup(self):
"""Initial configuration of the viewer. Can be used to set the camera position,
for example.
"""
pass
def _render_callback(self):
"""A custom callback that is called before rendering. Can be used
to implement custom visualizations.
"""
pass
def _step_callback(self):
"""A custom callback that is called after stepping the simulation. Can be used
to enforce additional constraints on the simulation state.
"""
pass