Migrate to mujoco-py 1.50 (#834)

* all envs run offscreen

* render works

* changed mujoco-py version

* Bump versions

* Update version and README

* Same versioning for all mujoco envs

* Fix typo

* Fix version

* Bump version again

* Revert "Fix version"

This reverts commit decc5779811801deb6ae9fad697dfe247d2bdd94.
This commit is contained in:
Matthias Plappert
2018-01-24 15:42:29 -08:00
committed by GitHub
parent 0c91364cd4
commit 6af4a5b9b2
18 changed files with 89 additions and 102 deletions

View File

@@ -4,7 +4,7 @@ OpenAI Gym
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments. **OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments.
.. image:: https://travis-ci.org/openai/gym.svg?branch=master .. image:: https://travis-ci.org/openai/gym.svg?branch=master
:target: https://travis-ci.org/openai/gym :target: https://travis-ci.org/openai/gym
`See What's New section below <#what-s-new>`_ `See What's New section below <#what-s-new>`_
@@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under
.. code:: shell .. code:: shell
xvfb-run -s "-screen 0 1400x900x24" bash xvfb-run -s "-screen 0 1400x900x24" bash
Installing dependencies for specific environments Installing dependencies for specific environments
------------------------------------------------- -------------------------------------------------
@@ -261,6 +261,10 @@ We are using `pytest <http://doc.pytest.org>`_ for tests. You can run them via:
What's new What's new
========== ==========
- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50.
Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance
should be similar (see https://github.com/openai/gym/pull/834) but there are likely
some differences due to changes in MuJoCo.
- 2017-06-16: Make env.spec into a property to fix a bug that occurs - 2017-06-16: Make env.spec into a property to fix a bug that occurs
when you try to print out an unregistered Env. when you try to print out an unregistered Env.
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at - 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at

View File

@@ -204,89 +204,89 @@ register(
# 2D # 2D
register( register(
id='Reacher-v1', id='Reacher-v2',
entry_point='gym.envs.mujoco:ReacherEnv', entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50, max_episode_steps=50,
reward_threshold=-3.75, reward_threshold=-3.75,
) )
register( register(
id='Pusher-v0', id='Pusher-v2',
entry_point='gym.envs.mujoco:PusherEnv', entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100, max_episode_steps=100,
reward_threshold=0.0, reward_threshold=0.0,
) )
register( register(
id='Thrower-v0', id='Thrower-v2',
entry_point='gym.envs.mujoco:ThrowerEnv', entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100, max_episode_steps=100,
reward_threshold=0.0, reward_threshold=0.0,
) )
register( register(
id='Striker-v0', id='Striker-v2',
entry_point='gym.envs.mujoco:StrikerEnv', entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100, max_episode_steps=100,
reward_threshold=0.0, reward_threshold=0.0,
) )
register( register(
id='InvertedPendulum-v1', id='InvertedPendulum-v2',
entry_point='gym.envs.mujoco:InvertedPendulumEnv', entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=950.0, reward_threshold=950.0,
) )
register( register(
id='InvertedDoublePendulum-v1', id='InvertedDoublePendulum-v2',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv', entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=9100.0, reward_threshold=9100.0,
) )
register( register(
id='HalfCheetah-v1', id='HalfCheetah-v2',
entry_point='gym.envs.mujoco:HalfCheetahEnv', entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=4800.0, reward_threshold=4800.0,
) )
register( register(
id='Hopper-v1', id='Hopper-v2',
entry_point='gym.envs.mujoco:HopperEnv', entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=3800.0, reward_threshold=3800.0,
) )
register( register(
id='Swimmer-v1', id='Swimmer-v2',
entry_point='gym.envs.mujoco:SwimmerEnv', entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=360.0, reward_threshold=360.0,
) )
register( register(
id='Walker2d-v1', id='Walker2d-v2',
max_episode_steps=1000, max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv', entry_point='gym.envs.mujoco:Walker2dEnv',
) )
register( register(
id='Ant-v1', id='Ant-v2',
entry_point='gym.envs.mujoco:AntEnv', entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000, max_episode_steps=1000,
reward_threshold=6000.0, reward_threshold=6000.0,
) )
register( register(
id='Humanoid-v1', id='Humanoid-v2',
entry_point='gym.envs.mujoco:HumanoidEnv', entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000, max_episode_steps=1000,
) )
register( register(
id='HumanoidStandup-v1', id='HumanoidStandup-v2',
entry_point='gym.envs.mujoco:HumanoidStandupEnv', entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000, max_episode_steps=1000,
) )

View File

@@ -14,7 +14,7 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
forward_reward = (xposafter - xposbefore)/self.dt forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum() ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum( contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1))) np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0 survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector() state = self.state_vector()
@@ -30,9 +30,9 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _get_obs(self): def _get_obs(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos.flat[2:], self.sim.data.qpos.flat[2:],
self.model.data.qvel.flat, self.sim.data.qvel.flat,
np.clip(self.model.data.cfrc_ext, -1, 1).flat, np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
]) ])
def reset_model(self): def reset_model(self):

View File

@@ -8,9 +8,9 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
def _step(self, action): def _step(self, action):
xposbefore = self.model.data.qpos[0, 0] xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip) self.do_simulation(action, self.frame_skip)
xposafter = self.model.data.qpos[0, 0] xposafter = self.sim.data.qpos[0]
ob = self._get_obs() ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum() reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt reward_run = (xposafter - xposbefore)/self.dt
@@ -20,8 +20,8 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _get_obs(self): def _get_obs(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos.flat[1:], self.sim.data.qpos.flat[1:],
self.model.data.qvel.flat, self.sim.data.qvel.flat,
]) ])
def reset_model(self): def reset_model(self):

View File

@@ -8,9 +8,9 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
def _step(self, a): def _step(self, a):
posbefore = self.model.data.qpos[0, 0] posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip) self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0] posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0 alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt reward = (posafter - posbefore) / self.dt
reward += alive_bonus reward += alive_bonus
@@ -23,8 +23,8 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _get_obs(self): def _get_obs(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos.flat[1:], self.sim.data.qpos.flat[1:],
np.clip(self.model.data.qvel.flat, -10, 10) np.clip(self.sim.data.qvel.flat, -10, 10)
]) ])
def reset_model(self): def reset_model(self):

View File

@@ -2,9 +2,9 @@ import numpy as np
from gym.envs.mujoco import mujoco_env from gym.envs.mujoco import mujoco_env
from gym import utils from gym import utils
def mass_center(model): def mass_center(model, sim):
mass = model.body_mass mass = np.expand_dims(model.body_mass, 1)
xpos = model.data.xipos xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0] return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
@@ -13,7 +13,7 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
def _get_obs(self): def _get_obs(self):
data = self.model.data data = self.sim.data
return np.concatenate([data.qpos.flat[2:], return np.concatenate([data.qpos.flat[2:],
data.qvel.flat, data.qvel.flat,
data.cinert.flat, data.cinert.flat,
@@ -22,17 +22,17 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
data.cfrc_ext.flat]) data.cfrc_ext.flat])
def _step(self, a): def _step(self, a):
pos_before = mass_center(self.model) pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip) self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model) pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0 alive_bonus = 5.0
data = self.model.data data = self.sim.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10) quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.model.data.qpos qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost) return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)

View File

@@ -2,18 +2,13 @@ import numpy as np
from gym.envs.mujoco import mujoco_env from gym.envs.mujoco import mujoco_env
from gym import utils from gym import utils
def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self): def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5) mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
def _get_obs(self): def _get_obs(self):
data = self.model.data data = self.sim.data
return np.concatenate([data.qpos.flat[2:], return np.concatenate([data.qpos.flat[2:],
data.qvel.flat, data.qvel.flat,
data.cinert.flat, data.cinert.flat,
@@ -23,8 +18,8 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _step(self, a): def _step(self, a):
self.do_simulation(a, self.frame_skip) self.do_simulation(a, self.frame_skip)
pos_after = self.model.data.qpos[2][0] pos_after = self.sim.data.qpos[2]
data = self.model.data data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()

View File

@@ -11,22 +11,22 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _step(self, action): def _step(self, action):
self.do_simulation(action, self.frame_skip) self.do_simulation(action, self.frame_skip)
ob = self._get_obs() ob = self._get_obs()
x, _, y = self.model.data.site_xpos[0] x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.model.data.qvel[1:3] v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10 alive_bonus = 10
r = (alive_bonus - dist_penalty - vel_penalty)[0] r = alive_bonus - dist_penalty - vel_penalty
done = bool(y <= 1) done = bool(y <= 1)
return ob, r, done, {} return ob, r, done, {}
def _get_obs(self): def _get_obs(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos[:1], # cart x pos self.sim.data.qpos[:1], # cart x pos
np.sin(self.model.data.qpos[1:]), # link angles np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.model.data.qpos[1:]), np.cos(self.sim.data.qpos[1:]),
np.clip(self.model.data.qvel, -10, 10), np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.model.data.qfrc_constraint, -10, 10) np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel() ]).ravel()
def reset_model(self): def reset_model(self):

View File

@@ -22,7 +22,7 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
return self._get_obs() return self._get_obs()
def _get_obs(self): def _get_obs(self):
return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel() return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
def viewer_setup(self): def viewer_setup(self):
v = self.viewer v = self.viewer

View File

@@ -9,7 +9,6 @@ import six
try: try:
import mujoco_py import mujoco_py
from mujoco_py.mjlib import mjlib
except ImportError as e: except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e)) raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
@@ -25,8 +24,9 @@ class MujocoEnv(gym.Env):
if not path.exists(fullpath): if not path.exists(fullpath):
raise IOError("File %s does not exist" % fullpath) raise IOError("File %s does not exist" % fullpath)
self.frame_skip = frame_skip self.frame_skip = frame_skip
self.model = mujoco_py.MjModel(fullpath) self.model = mujoco_py.load_model_from_path(fullpath)
self.data = self.model.data self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
self.viewer = None self.viewer = None
self.metadata = { self.metadata = {
@@ -34,8 +34,8 @@ class MujocoEnv(gym.Env):
'video.frames_per_second': int(np.round(1.0 / self.dt)) 'video.frames_per_second': int(np.round(1.0 / self.dt))
} }
self.init_qpos = self.model.data.qpos.ravel().copy() self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.model.data.qvel.ravel().copy() self.init_qvel = self.sim.data.qvel.ravel().copy()
observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
assert not done assert not done
self.obs_dim = observation.size self.obs_dim = observation.size
@@ -76,33 +76,33 @@ class MujocoEnv(gym.Env):
# ----------------------------- # -----------------------------
def _reset(self): def _reset(self):
mjlib.mj_resetData(self.model.ptr, self.data.ptr) self.sim.reset()
ob = self.reset_model() ob = self.reset_model()
if self.viewer is not None: if self.viewer is not None:
self.viewer.autoscale()
self.viewer_setup() self.viewer_setup()
return ob return ob
def set_state(self, qpos, qvel): def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
self.model.data.qpos = qpos old_state = self.sim.get_state()
self.model.data.qvel = qvel new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
self.model._compute_subtree() # pylint: disable=W0212 old_state.act, old_state.udd_state)
self.model.forward() self.sim.set_state(new_state)
self.sim.forward()
@property @property
def dt(self): def dt(self):
return self.model.opt.timestep * self.frame_skip return self.model.opt.timestep * self.frame_skip
def do_simulation(self, ctrl, n_frames): def do_simulation(self, ctrl, n_frames):
self.model.data.ctrl = ctrl self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames): for _ in range(n_frames):
self.model.step() self.sim.step()
def _render(self, mode='human', close=False): def _render(self, mode='human', close=False):
if close: if close:
if self.viewer is not None: if self.viewer is not None:
self._get_viewer().finish() self._get_viewer()
self.viewer = None self.viewer = None
return return
@@ -111,30 +111,19 @@ class MujocoEnv(gym.Env):
data, width, height = self._get_viewer().get_image() data, width, height = self._get_viewer().get_image()
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :] return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :]
elif mode == 'human': elif mode == 'human':
self._get_viewer().loop_once() self._get_viewer().render()
def _get_viewer(self): def _get_viewer(self):
if self.viewer is None: if self.viewer is None:
self.viewer = mujoco_py.MjViewer() self.viewer = mujoco_py.MjViewer(self.sim)
self.viewer.start()
self.viewer.set_model(self.model)
self.viewer_setup() self.viewer_setup()
return self.viewer return self.viewer
def get_body_com(self, body_name): def get_body_com(self, body_name):
idx = self.model.body_names.index(six.b(body_name)) return self.data.get_body_xpos(body_name)
return self.model.data.com_subtree[idx]
def get_body_comvel(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.body_comvels[idx]
def get_body_xmat(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.data.xmat[idx].reshape((3, 3))
def state_vector(self): def state_vector(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos.flat, self.sim.data.qpos.flat,
self.model.data.qvel.flat self.sim.data.qvel.flat
]) ])

View File

@@ -3,7 +3,6 @@ from gym import utils
from gym.envs.mujoco import mujoco_env from gym.envs.mujoco import mujoco_env
import mujoco_py import mujoco_py
from mujoco_py.mjlib import mjlib
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self): def __init__(self):
@@ -50,8 +49,8 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _get_obs(self): def _get_obs(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos.flat[:7], self.sim.data.qpos.flat[:7],
self.model.data.qvel.flat[:7], self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"), self.get_body_com("tips_arm"),
self.get_body_com("object"), self.get_body_com("object"),
self.get_body_com("goal"), self.get_body_com("goal"),

View File

@@ -33,11 +33,11 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
return self._get_obs() return self._get_obs()
def _get_obs(self): def _get_obs(self):
theta = self.model.data.qpos.flat[:2] theta = self.sim.data.qpos.flat[:2]
return np.concatenate([ return np.concatenate([
np.cos(theta), np.cos(theta),
np.sin(theta), np.sin(theta),
self.model.data.qpos.flat[2:], self.sim.data.qpos.flat[2:],
self.model.data.qvel.flat[:2], self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target") self.get_body_com("fingertip") - self.get_body_com("target")
]) ])

View File

@@ -67,8 +67,8 @@ class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _get_obs(self): def _get_obs(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos.flat[:7], self.sim.data.qpos.flat[:7],
self.model.data.qvel.flat[:7], self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"), self.get_body_com("tips_arm"),
self.get_body_com("object"), self.get_body_com("object"),
self.get_body_com("goal"), self.get_body_com("goal"),

View File

@@ -9,9 +9,9 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _step(self, a): def _step(self, a):
ctrl_cost_coeff = 0.0001 ctrl_cost_coeff = 0.0001
xposbefore = self.model.data.qpos[0, 0] xposbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip) self.do_simulation(a, self.frame_skip)
xposafter = self.model.data.qpos[0, 0] xposafter = self.sim.data.qpos[0]
reward_fwd = (xposafter - xposbefore) / self.dt reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum() reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl reward = reward_fwd + reward_ctrl
@@ -19,8 +19,8 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl) return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
def _get_obs(self): def _get_obs(self):
qpos = self.model.data.qpos qpos = self.sim.data.qpos
qvel = self.model.data.qvel qvel = self.sim.data.qvel
return np.concatenate([qpos.flat[2:], qvel.flat]) return np.concatenate([qpos.flat[2:], qvel.flat])
def reset_model(self): def reset_model(self):

View File

@@ -52,8 +52,8 @@ class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _get_obs(self): def _get_obs(self):
return np.concatenate([ return np.concatenate([
self.model.data.qpos.flat[:7], self.sim.data.qpos.flat[:7],
self.model.data.qvel.flat[:7], self.sim.data.qvel.flat[:7],
self.get_body_com("r_wrist_roll_link"), self.get_body_com("r_wrist_roll_link"),
self.get_body_com("ball"), self.get_body_com("ball"),
self.get_body_com("goal"), self.get_body_com("goal"),

View File

@@ -9,9 +9,9 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self) utils.EzPickle.__init__(self)
def _step(self, a): def _step(self, a):
posbefore = self.model.data.qpos[0, 0] posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip) self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0] posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0 alive_bonus = 1.0
reward = ((posafter - posbefore) / self.dt) reward = ((posafter - posbefore) / self.dt)
reward += alive_bonus reward += alive_bonus
@@ -22,8 +22,8 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
return ob, reward, done, {} return ob, reward, done, {}
def _get_obs(self): def _get_obs(self):
qpos = self.model.data.qpos qpos = self.sim.data.qpos
qvel = self.model.data.qvel qvel = self.sim.data.qvel
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
def reset_model(self): def reset_model(self):

View File

@@ -1 +1 @@
VERSION = '0.9.3' VERSION = '0.9.5'

View File

@@ -11,7 +11,7 @@ extras = {
'board_game' : ['pachi-py>=0.0.19'], 'board_game' : ['pachi-py>=0.0.19'],
'box2d': ['Box2D-kengz'], 'box2d': ['Box2D-kengz'],
'classic_control': ['PyOpenGL'], 'classic_control': ['PyOpenGL'],
'mujoco': ['mujoco_py<1.0.0,>=0.4.3', 'imageio'], 'mujoco': ['mujoco_py>=1.50', 'imageio'],
'parameter_tuning': ['keras', 'theano'], 'parameter_tuning': ['keras', 'theano'],
} }