mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 06:07:08 +00:00
Migrate to mujoco-py 1.50 (#834)
* all envs run offscreen * render works * changed mujoco-py version * Bump versions * Update version and README * Same versioning for all mujoco envs * Fix typo * Fix version * Bump version again * Revert "Fix version" This reverts commit decc5779811801deb6ae9fad697dfe247d2bdd94.
This commit is contained in:
committed by
GitHub
parent
0c91364cd4
commit
6af4a5b9b2
@@ -4,7 +4,7 @@ OpenAI Gym
|
|||||||
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments.
|
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments.
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/openai/gym.svg?branch=master
|
.. image:: https://travis-ci.org/openai/gym.svg?branch=master
|
||||||
:target: https://travis-ci.org/openai/gym
|
:target: https://travis-ci.org/openai/gym
|
||||||
|
|
||||||
`See What's New section below <#what-s-new>`_
|
`See What's New section below <#what-s-new>`_
|
||||||
|
|
||||||
@@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under
|
|||||||
|
|
||||||
.. code:: shell
|
.. code:: shell
|
||||||
|
|
||||||
xvfb-run -s "-screen 0 1400x900x24" bash
|
xvfb-run -s "-screen 0 1400x900x24" bash
|
||||||
|
|
||||||
Installing dependencies for specific environments
|
Installing dependencies for specific environments
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
@@ -261,6 +261,10 @@ We are using `pytest <http://doc.pytest.org>`_ for tests. You can run them via:
|
|||||||
What's new
|
What's new
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50.
|
||||||
|
Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance
|
||||||
|
should be similar (see https://github.com/openai/gym/pull/834) but there are likely
|
||||||
|
some differences due to changes in MuJoCo.
|
||||||
- 2017-06-16: Make env.spec into a property to fix a bug that occurs
|
- 2017-06-16: Make env.spec into a property to fix a bug that occurs
|
||||||
when you try to print out an unregistered Env.
|
when you try to print out an unregistered Env.
|
||||||
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at
|
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at
|
||||||
|
@@ -204,89 +204,89 @@ register(
|
|||||||
# 2D
|
# 2D
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Reacher-v1',
|
id='Reacher-v2',
|
||||||
entry_point='gym.envs.mujoco:ReacherEnv',
|
entry_point='gym.envs.mujoco:ReacherEnv',
|
||||||
max_episode_steps=50,
|
max_episode_steps=50,
|
||||||
reward_threshold=-3.75,
|
reward_threshold=-3.75,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Pusher-v0',
|
id='Pusher-v2',
|
||||||
entry_point='gym.envs.mujoco:PusherEnv',
|
entry_point='gym.envs.mujoco:PusherEnv',
|
||||||
max_episode_steps=100,
|
max_episode_steps=100,
|
||||||
reward_threshold=0.0,
|
reward_threshold=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Thrower-v0',
|
id='Thrower-v2',
|
||||||
entry_point='gym.envs.mujoco:ThrowerEnv',
|
entry_point='gym.envs.mujoco:ThrowerEnv',
|
||||||
max_episode_steps=100,
|
max_episode_steps=100,
|
||||||
reward_threshold=0.0,
|
reward_threshold=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Striker-v0',
|
id='Striker-v2',
|
||||||
entry_point='gym.envs.mujoco:StrikerEnv',
|
entry_point='gym.envs.mujoco:StrikerEnv',
|
||||||
max_episode_steps=100,
|
max_episode_steps=100,
|
||||||
reward_threshold=0.0,
|
reward_threshold=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='InvertedPendulum-v1',
|
id='InvertedPendulum-v2',
|
||||||
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
|
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=950.0,
|
reward_threshold=950.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='InvertedDoublePendulum-v1',
|
id='InvertedDoublePendulum-v2',
|
||||||
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
|
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=9100.0,
|
reward_threshold=9100.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='HalfCheetah-v1',
|
id='HalfCheetah-v2',
|
||||||
entry_point='gym.envs.mujoco:HalfCheetahEnv',
|
entry_point='gym.envs.mujoco:HalfCheetahEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=4800.0,
|
reward_threshold=4800.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Hopper-v1',
|
id='Hopper-v2',
|
||||||
entry_point='gym.envs.mujoco:HopperEnv',
|
entry_point='gym.envs.mujoco:HopperEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=3800.0,
|
reward_threshold=3800.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Swimmer-v1',
|
id='Swimmer-v2',
|
||||||
entry_point='gym.envs.mujoco:SwimmerEnv',
|
entry_point='gym.envs.mujoco:SwimmerEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=360.0,
|
reward_threshold=360.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Walker2d-v1',
|
id='Walker2d-v2',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
entry_point='gym.envs.mujoco:Walker2dEnv',
|
entry_point='gym.envs.mujoco:Walker2dEnv',
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Ant-v1',
|
id='Ant-v2',
|
||||||
entry_point='gym.envs.mujoco:AntEnv',
|
entry_point='gym.envs.mujoco:AntEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=6000.0,
|
reward_threshold=6000.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='Humanoid-v1',
|
id='Humanoid-v2',
|
||||||
entry_point='gym.envs.mujoco:HumanoidEnv',
|
entry_point='gym.envs.mujoco:HumanoidEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='HumanoidStandup-v1',
|
id='HumanoidStandup-v2',
|
||||||
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
|
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
|
||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
)
|
)
|
||||||
|
@@ -14,7 +14,7 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
forward_reward = (xposafter - xposbefore)/self.dt
|
forward_reward = (xposafter - xposbefore)/self.dt
|
||||||
ctrl_cost = .5 * np.square(a).sum()
|
ctrl_cost = .5 * np.square(a).sum()
|
||||||
contact_cost = 0.5 * 1e-3 * np.sum(
|
contact_cost = 0.5 * 1e-3 * np.sum(
|
||||||
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
|
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
|
||||||
survive_reward = 1.0
|
survive_reward = 1.0
|
||||||
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
|
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
|
||||||
state = self.state_vector()
|
state = self.state_vector()
|
||||||
@@ -30,9 +30,9 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos.flat[2:],
|
self.sim.data.qpos.flat[2:],
|
||||||
self.model.data.qvel.flat,
|
self.sim.data.qvel.flat,
|
||||||
np.clip(self.model.data.cfrc_ext, -1, 1).flat,
|
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
|
||||||
])
|
])
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -8,9 +8,9 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
|
|
||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
xposbefore = self.model.data.qpos[0, 0]
|
xposbefore = self.sim.data.qpos[0]
|
||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
xposafter = self.model.data.qpos[0, 0]
|
xposafter = self.sim.data.qpos[0]
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
reward_ctrl = - 0.1 * np.square(action).sum()
|
reward_ctrl = - 0.1 * np.square(action).sum()
|
||||||
reward_run = (xposafter - xposbefore)/self.dt
|
reward_run = (xposafter - xposbefore)/self.dt
|
||||||
@@ -20,8 +20,8 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos.flat[1:],
|
self.sim.data.qpos.flat[1:],
|
||||||
self.model.data.qvel.flat,
|
self.sim.data.qvel.flat,
|
||||||
])
|
])
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -8,9 +8,9 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
|
|
||||||
def _step(self, a):
|
def _step(self, a):
|
||||||
posbefore = self.model.data.qpos[0, 0]
|
posbefore = self.sim.data.qpos[0]
|
||||||
self.do_simulation(a, self.frame_skip)
|
self.do_simulation(a, self.frame_skip)
|
||||||
posafter, height, ang = self.model.data.qpos[0:3, 0]
|
posafter, height, ang = self.sim.data.qpos[0:3]
|
||||||
alive_bonus = 1.0
|
alive_bonus = 1.0
|
||||||
reward = (posafter - posbefore) / self.dt
|
reward = (posafter - posbefore) / self.dt
|
||||||
reward += alive_bonus
|
reward += alive_bonus
|
||||||
@@ -23,8 +23,8 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos.flat[1:],
|
self.sim.data.qpos.flat[1:],
|
||||||
np.clip(self.model.data.qvel.flat, -10, 10)
|
np.clip(self.sim.data.qvel.flat, -10, 10)
|
||||||
])
|
])
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -2,9 +2,9 @@ import numpy as np
|
|||||||
from gym.envs.mujoco import mujoco_env
|
from gym.envs.mujoco import mujoco_env
|
||||||
from gym import utils
|
from gym import utils
|
||||||
|
|
||||||
def mass_center(model):
|
def mass_center(model, sim):
|
||||||
mass = model.body_mass
|
mass = np.expand_dims(model.body_mass, 1)
|
||||||
xpos = model.data.xipos
|
xpos = sim.data.xipos
|
||||||
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
|
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
|
||||||
|
|
||||||
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||||
@@ -13,7 +13,7 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
data = self.model.data
|
data = self.sim.data
|
||||||
return np.concatenate([data.qpos.flat[2:],
|
return np.concatenate([data.qpos.flat[2:],
|
||||||
data.qvel.flat,
|
data.qvel.flat,
|
||||||
data.cinert.flat,
|
data.cinert.flat,
|
||||||
@@ -22,17 +22,17 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
data.cfrc_ext.flat])
|
data.cfrc_ext.flat])
|
||||||
|
|
||||||
def _step(self, a):
|
def _step(self, a):
|
||||||
pos_before = mass_center(self.model)
|
pos_before = mass_center(self.model, self.sim)
|
||||||
self.do_simulation(a, self.frame_skip)
|
self.do_simulation(a, self.frame_skip)
|
||||||
pos_after = mass_center(self.model)
|
pos_after = mass_center(self.model, self.sim)
|
||||||
alive_bonus = 5.0
|
alive_bonus = 5.0
|
||||||
data = self.model.data
|
data = self.sim.data
|
||||||
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
|
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
|
||||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||||
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
|
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
|
||||||
quad_impact_cost = min(quad_impact_cost, 10)
|
quad_impact_cost = min(quad_impact_cost, 10)
|
||||||
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
|
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
|
||||||
qpos = self.model.data.qpos
|
qpos = self.sim.data.qpos
|
||||||
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
|
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
|
||||||
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
|
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
|
||||||
|
|
||||||
|
@@ -2,18 +2,13 @@ import numpy as np
|
|||||||
from gym.envs.mujoco import mujoco_env
|
from gym.envs.mujoco import mujoco_env
|
||||||
from gym import utils
|
from gym import utils
|
||||||
|
|
||||||
def mass_center(model):
|
|
||||||
mass = model.body_mass
|
|
||||||
xpos = model.data.xipos
|
|
||||||
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
|
|
||||||
|
|
||||||
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
|
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
|
||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
data = self.model.data
|
data = self.sim.data
|
||||||
return np.concatenate([data.qpos.flat[2:],
|
return np.concatenate([data.qpos.flat[2:],
|
||||||
data.qvel.flat,
|
data.qvel.flat,
|
||||||
data.cinert.flat,
|
data.cinert.flat,
|
||||||
@@ -23,8 +18,8 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _step(self, a):
|
def _step(self, a):
|
||||||
self.do_simulation(a, self.frame_skip)
|
self.do_simulation(a, self.frame_skip)
|
||||||
pos_after = self.model.data.qpos[2][0]
|
pos_after = self.sim.data.qpos[2]
|
||||||
data = self.model.data
|
data = self.sim.data
|
||||||
uph_cost = (pos_after - 0) / self.model.opt.timestep
|
uph_cost = (pos_after - 0) / self.model.opt.timestep
|
||||||
|
|
||||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||||
|
@@ -11,22 +11,22 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
def _step(self, action):
|
def _step(self, action):
|
||||||
self.do_simulation(action, self.frame_skip)
|
self.do_simulation(action, self.frame_skip)
|
||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
x, _, y = self.model.data.site_xpos[0]
|
x, _, y = self.sim.data.site_xpos[0]
|
||||||
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
|
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
|
||||||
v1, v2 = self.model.data.qvel[1:3]
|
v1, v2 = self.sim.data.qvel[1:3]
|
||||||
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
|
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
|
||||||
alive_bonus = 10
|
alive_bonus = 10
|
||||||
r = (alive_bonus - dist_penalty - vel_penalty)[0]
|
r = alive_bonus - dist_penalty - vel_penalty
|
||||||
done = bool(y <= 1)
|
done = bool(y <= 1)
|
||||||
return ob, r, done, {}
|
return ob, r, done, {}
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos[:1], # cart x pos
|
self.sim.data.qpos[:1], # cart x pos
|
||||||
np.sin(self.model.data.qpos[1:]), # link angles
|
np.sin(self.sim.data.qpos[1:]), # link angles
|
||||||
np.cos(self.model.data.qpos[1:]),
|
np.cos(self.sim.data.qpos[1:]),
|
||||||
np.clip(self.model.data.qvel, -10, 10),
|
np.clip(self.sim.data.qvel, -10, 10),
|
||||||
np.clip(self.model.data.qfrc_constraint, -10, 10)
|
np.clip(self.sim.data.qfrc_constraint, -10, 10)
|
||||||
]).ravel()
|
]).ravel()
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -22,7 +22,7 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
|
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
|
||||||
|
|
||||||
def viewer_setup(self):
|
def viewer_setup(self):
|
||||||
v = self.viewer
|
v = self.viewer
|
||||||
|
@@ -9,7 +9,6 @@ import six
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import mujoco_py
|
import mujoco_py
|
||||||
from mujoco_py.mjlib import mjlib
|
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
|
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
|
||||||
|
|
||||||
@@ -25,8 +24,9 @@ class MujocoEnv(gym.Env):
|
|||||||
if not path.exists(fullpath):
|
if not path.exists(fullpath):
|
||||||
raise IOError("File %s does not exist" % fullpath)
|
raise IOError("File %s does not exist" % fullpath)
|
||||||
self.frame_skip = frame_skip
|
self.frame_skip = frame_skip
|
||||||
self.model = mujoco_py.MjModel(fullpath)
|
self.model = mujoco_py.load_model_from_path(fullpath)
|
||||||
self.data = self.model.data
|
self.sim = mujoco_py.MjSim(self.model)
|
||||||
|
self.data = self.sim.data
|
||||||
self.viewer = None
|
self.viewer = None
|
||||||
|
|
||||||
self.metadata = {
|
self.metadata = {
|
||||||
@@ -34,8 +34,8 @@ class MujocoEnv(gym.Env):
|
|||||||
'video.frames_per_second': int(np.round(1.0 / self.dt))
|
'video.frames_per_second': int(np.round(1.0 / self.dt))
|
||||||
}
|
}
|
||||||
|
|
||||||
self.init_qpos = self.model.data.qpos.ravel().copy()
|
self.init_qpos = self.sim.data.qpos.ravel().copy()
|
||||||
self.init_qvel = self.model.data.qvel.ravel().copy()
|
self.init_qvel = self.sim.data.qvel.ravel().copy()
|
||||||
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
|
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
|
||||||
assert not done
|
assert not done
|
||||||
self.obs_dim = observation.size
|
self.obs_dim = observation.size
|
||||||
@@ -76,33 +76,33 @@ class MujocoEnv(gym.Env):
|
|||||||
# -----------------------------
|
# -----------------------------
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
mjlib.mj_resetData(self.model.ptr, self.data.ptr)
|
self.sim.reset()
|
||||||
ob = self.reset_model()
|
ob = self.reset_model()
|
||||||
if self.viewer is not None:
|
if self.viewer is not None:
|
||||||
self.viewer.autoscale()
|
|
||||||
self.viewer_setup()
|
self.viewer_setup()
|
||||||
return ob
|
return ob
|
||||||
|
|
||||||
def set_state(self, qpos, qvel):
|
def set_state(self, qpos, qvel):
|
||||||
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
|
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
|
||||||
self.model.data.qpos = qpos
|
old_state = self.sim.get_state()
|
||||||
self.model.data.qvel = qvel
|
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
|
||||||
self.model._compute_subtree() # pylint: disable=W0212
|
old_state.act, old_state.udd_state)
|
||||||
self.model.forward()
|
self.sim.set_state(new_state)
|
||||||
|
self.sim.forward()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dt(self):
|
def dt(self):
|
||||||
return self.model.opt.timestep * self.frame_skip
|
return self.model.opt.timestep * self.frame_skip
|
||||||
|
|
||||||
def do_simulation(self, ctrl, n_frames):
|
def do_simulation(self, ctrl, n_frames):
|
||||||
self.model.data.ctrl = ctrl
|
self.sim.data.ctrl[:] = ctrl
|
||||||
for _ in range(n_frames):
|
for _ in range(n_frames):
|
||||||
self.model.step()
|
self.sim.step()
|
||||||
|
|
||||||
def _render(self, mode='human', close=False):
|
def _render(self, mode='human', close=False):
|
||||||
if close:
|
if close:
|
||||||
if self.viewer is not None:
|
if self.viewer is not None:
|
||||||
self._get_viewer().finish()
|
self._get_viewer()
|
||||||
self.viewer = None
|
self.viewer = None
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -111,30 +111,19 @@ class MujocoEnv(gym.Env):
|
|||||||
data, width, height = self._get_viewer().get_image()
|
data, width, height = self._get_viewer().get_image()
|
||||||
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :]
|
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :]
|
||||||
elif mode == 'human':
|
elif mode == 'human':
|
||||||
self._get_viewer().loop_once()
|
self._get_viewer().render()
|
||||||
|
|
||||||
def _get_viewer(self):
|
def _get_viewer(self):
|
||||||
if self.viewer is None:
|
if self.viewer is None:
|
||||||
self.viewer = mujoco_py.MjViewer()
|
self.viewer = mujoco_py.MjViewer(self.sim)
|
||||||
self.viewer.start()
|
|
||||||
self.viewer.set_model(self.model)
|
|
||||||
self.viewer_setup()
|
self.viewer_setup()
|
||||||
return self.viewer
|
return self.viewer
|
||||||
|
|
||||||
def get_body_com(self, body_name):
|
def get_body_com(self, body_name):
|
||||||
idx = self.model.body_names.index(six.b(body_name))
|
return self.data.get_body_xpos(body_name)
|
||||||
return self.model.data.com_subtree[idx]
|
|
||||||
|
|
||||||
def get_body_comvel(self, body_name):
|
|
||||||
idx = self.model.body_names.index(six.b(body_name))
|
|
||||||
return self.model.body_comvels[idx]
|
|
||||||
|
|
||||||
def get_body_xmat(self, body_name):
|
|
||||||
idx = self.model.body_names.index(six.b(body_name))
|
|
||||||
return self.model.data.xmat[idx].reshape((3, 3))
|
|
||||||
|
|
||||||
def state_vector(self):
|
def state_vector(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos.flat,
|
self.sim.data.qpos.flat,
|
||||||
self.model.data.qvel.flat
|
self.sim.data.qvel.flat
|
||||||
])
|
])
|
||||||
|
@@ -3,7 +3,6 @@ from gym import utils
|
|||||||
from gym.envs.mujoco import mujoco_env
|
from gym.envs.mujoco import mujoco_env
|
||||||
|
|
||||||
import mujoco_py
|
import mujoco_py
|
||||||
from mujoco_py.mjlib import mjlib
|
|
||||||
|
|
||||||
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -50,8 +49,8 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos.flat[:7],
|
self.sim.data.qpos.flat[:7],
|
||||||
self.model.data.qvel.flat[:7],
|
self.sim.data.qvel.flat[:7],
|
||||||
self.get_body_com("tips_arm"),
|
self.get_body_com("tips_arm"),
|
||||||
self.get_body_com("object"),
|
self.get_body_com("object"),
|
||||||
self.get_body_com("goal"),
|
self.get_body_com("goal"),
|
||||||
|
@@ -33,11 +33,11 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
theta = self.model.data.qpos.flat[:2]
|
theta = self.sim.data.qpos.flat[:2]
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
np.cos(theta),
|
np.cos(theta),
|
||||||
np.sin(theta),
|
np.sin(theta),
|
||||||
self.model.data.qpos.flat[2:],
|
self.sim.data.qpos.flat[2:],
|
||||||
self.model.data.qvel.flat[:2],
|
self.sim.data.qvel.flat[:2],
|
||||||
self.get_body_com("fingertip") - self.get_body_com("target")
|
self.get_body_com("fingertip") - self.get_body_com("target")
|
||||||
])
|
])
|
||||||
|
@@ -67,8 +67,8 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos.flat[:7],
|
self.sim.data.qpos.flat[:7],
|
||||||
self.model.data.qvel.flat[:7],
|
self.sim.data.qvel.flat[:7],
|
||||||
self.get_body_com("tips_arm"),
|
self.get_body_com("tips_arm"),
|
||||||
self.get_body_com("object"),
|
self.get_body_com("object"),
|
||||||
self.get_body_com("goal"),
|
self.get_body_com("goal"),
|
||||||
|
@@ -9,9 +9,9 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _step(self, a):
|
def _step(self, a):
|
||||||
ctrl_cost_coeff = 0.0001
|
ctrl_cost_coeff = 0.0001
|
||||||
xposbefore = self.model.data.qpos[0, 0]
|
xposbefore = self.sim.data.qpos[0]
|
||||||
self.do_simulation(a, self.frame_skip)
|
self.do_simulation(a, self.frame_skip)
|
||||||
xposafter = self.model.data.qpos[0, 0]
|
xposafter = self.sim.data.qpos[0]
|
||||||
reward_fwd = (xposafter - xposbefore) / self.dt
|
reward_fwd = (xposafter - xposbefore) / self.dt
|
||||||
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
|
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
|
||||||
reward = reward_fwd + reward_ctrl
|
reward = reward_fwd + reward_ctrl
|
||||||
@@ -19,8 +19,8 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
|
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
qpos = self.model.data.qpos
|
qpos = self.sim.data.qpos
|
||||||
qvel = self.model.data.qvel
|
qvel = self.sim.data.qvel
|
||||||
return np.concatenate([qpos.flat[2:], qvel.flat])
|
return np.concatenate([qpos.flat[2:], qvel.flat])
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -52,8 +52,8 @@ class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
self.model.data.qpos.flat[:7],
|
self.sim.data.qpos.flat[:7],
|
||||||
self.model.data.qvel.flat[:7],
|
self.sim.data.qvel.flat[:7],
|
||||||
self.get_body_com("r_wrist_roll_link"),
|
self.get_body_com("r_wrist_roll_link"),
|
||||||
self.get_body_com("ball"),
|
self.get_body_com("ball"),
|
||||||
self.get_body_com("goal"),
|
self.get_body_com("goal"),
|
||||||
|
@@ -9,9 +9,9 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
|
|
||||||
def _step(self, a):
|
def _step(self, a):
|
||||||
posbefore = self.model.data.qpos[0, 0]
|
posbefore = self.sim.data.qpos[0]
|
||||||
self.do_simulation(a, self.frame_skip)
|
self.do_simulation(a, self.frame_skip)
|
||||||
posafter, height, ang = self.model.data.qpos[0:3, 0]
|
posafter, height, ang = self.sim.data.qpos[0:3]
|
||||||
alive_bonus = 1.0
|
alive_bonus = 1.0
|
||||||
reward = ((posafter - posbefore) / self.dt)
|
reward = ((posafter - posbefore) / self.dt)
|
||||||
reward += alive_bonus
|
reward += alive_bonus
|
||||||
@@ -22,8 +22,8 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
return ob, reward, done, {}
|
return ob, reward, done, {}
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
qpos = self.model.data.qpos
|
qpos = self.sim.data.qpos
|
||||||
qvel = self.model.data.qvel
|
qvel = self.sim.data.qvel
|
||||||
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
|
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
|
@@ -1 +1 @@
|
|||||||
VERSION = '0.9.3'
|
VERSION = '0.9.5'
|
||||||
|
2
setup.py
2
setup.py
@@ -11,7 +11,7 @@ extras = {
|
|||||||
'board_game' : ['pachi-py>=0.0.19'],
|
'board_game' : ['pachi-py>=0.0.19'],
|
||||||
'box2d': ['Box2D-kengz'],
|
'box2d': ['Box2D-kengz'],
|
||||||
'classic_control': ['PyOpenGL'],
|
'classic_control': ['PyOpenGL'],
|
||||||
'mujoco': ['mujoco_py<1.0.0,>=0.4.3', 'imageio'],
|
'mujoco': ['mujoco_py>=1.50', 'imageio'],
|
||||||
'parameter_tuning': ['keras', 'theano'],
|
'parameter_tuning': ['keras', 'theano'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user