Lint fixes to mujoco environment (#377)

This commit is contained in:
Alex Ray
2017-02-22 17:24:27 -08:00
committed by Tom Brown
parent 6d7e50e67b
commit f724a72006
12 changed files with 31 additions and 32 deletions

View File

@@ -1,4 +1,4 @@
from gym.envs.mujoco.mujoco_env import MujocoEnv
from gym.envs.mujoco.mujoco_env import MujocoEnv
# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly
from gym.envs.mujoco.ant import AntEnv

View File

@@ -36,7 +36,7 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
])
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq,low=-.1,high=.1)
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return self._get_obs()

View File

@@ -8,15 +8,15 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self)
def _step(self, action):
xposbefore = self.model.data.qpos[0,0]
xposbefore = self.model.data.qpos[0, 0]
self.do_simulation(action, self.frame_skip)
xposafter = self.model.data.qpos[0,0]
xposafter = self.model.data.qpos[0, 0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
reward = reward_ctrl + reward_run
done = False
return ob, reward, done, dict(reward_run = reward_run, reward_ctrl=reward_ctrl)
return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
def _get_obs(self):
return np.concatenate([

View File

@@ -8,9 +8,9 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self)
def _step(self, a):
posbefore = self.model.data.qpos[0,0]
posbefore = self.model.data.qpos[0, 0]
self.do_simulation(a, self.frame_skip)
posafter,height,ang = self.model.data.qpos[0:3,0]
posafter, height, ang = self.model.data.qpos[0:3, 0]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
@@ -24,7 +24,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
np.clip(self.model.data.qvel.flat,-10,10)
np.clip(self.model.data.qvel.flat, -10, 10)
])
def reset_model(self):

View File

@@ -28,7 +28,7 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
alive_bonus = 5.0
data = self.model.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus

View File

@@ -25,9 +25,9 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self.do_simulation(a, self.frame_skip)
pos_after = self.model.data.qpos[2][0]
data = self.model.data
uph_cost=(pos_after - 0 ) / self.model.opt.timestep
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1

View File

@@ -38,6 +38,6 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def viewer_setup(self):
v = self.viewer
v.cam.trackbodyid=0
v.cam.trackbodyid = 0
v.cam.distance = v.model.stat.extent * 0.5
v.cam.lookat[2] += 3#v.model.stat.center[2]
v.cam.lookat[2] += 3 # v.model.stat.center[2]

View File

@@ -26,5 +26,5 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def viewer_setup(self):
v = self.viewer
v.cam.trackbodyid=0
v.cam.trackbodyid = 0
v.cam.distance = v.model.stat.extent

View File

@@ -23,15 +23,15 @@ class MujocoEnv(gym.Env):
else:
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
if not path.exists(fullpath):
raise IOError("File %s does not exist"%fullpath)
self.frame_skip= frame_skip
raise IOError("File %s does not exist" % fullpath)
self.frame_skip = frame_skip
self.model = mujoco_py.MjModel(fullpath)
self.data = self.model.data
self.viewer = None
self.metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : int(np.round(1.0 / self.dt))
'video.frames_per_second': int(np.round(1.0 / self.dt))
}
self.init_qpos = self.model.data.qpos.ravel().copy()
@@ -87,10 +87,9 @@ class MujocoEnv(gym.Env):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
self.model.data.qpos = qpos
self.model.data.qvel = qvel
self.model._compute_subtree() #pylint: disable=W0212
self.model._compute_subtree() # pylint: disable=W0212
self.model.forward()
@property
def dt(self):
return self.model.opt.timestep * self.frame_skip
@@ -110,7 +109,7 @@ class MujocoEnv(gym.Env):
if mode == 'rgb_array':
self._get_viewer().render()
data, width, height = self._get_viewer().get_image()
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:]
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :]
elif mode == 'human':
self._get_viewer().loop_once()

View File

@@ -18,13 +18,14 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid=0
self.viewer.cam.trackbodyid = 0
def reset_model(self):
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
while True:
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
if np.linalg.norm(self.goal) < 2: break
if np.linalg.norm(self.goal) < 2:
break
qpos[-2:] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel[-2:] = 0

View File

@@ -9,15 +9,14 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def _step(self, a):
ctrl_cost_coeff = 0.0001
xposbefore = self.model.data.qpos[0,0]
xposbefore = self.model.data.qpos[0, 0]
self.do_simulation(a, self.frame_skip)
xposafter = self.model.data.qpos[0,0]
xposafter = self.model.data.qpos[0, 0]
reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
return ob, reward, False, dict(reward_fwd = reward_fwd, reward_ctrl=reward_ctrl)
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
def _get_obs(self):
qpos = self.model.data.qpos

View File

@@ -9,22 +9,22 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self)
def _step(self, a):
posbefore = self.model.data.qpos[0,0]
posbefore = self.model.data.qpos[0, 0]
self.do_simulation(a, self.frame_skip)
posafter,height,ang = self.model.data.qpos[0:3,0]
posafter, height, ang = self.model.data.qpos[0:3, 0]
alive_bonus = 1.0
reward = ((posafter - posbefore) / self.dt )
reward = ((posafter - posbefore) / self.dt)
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
done = not (height > 0.8 and height < 2.0
and ang > -1.0 and ang < 1.0)
done = not (height > 0.8 and height < 2.0 and
ang > -1.0 and ang < 1.0)
ob = self._get_obs()
return ob, reward, done, {}
def _get_obs(self):
qpos = self.model.data.qpos
qvel = self.model.data.qvel
return np.concatenate([qpos[1:], np.clip(qvel,-10,10)]).ravel()
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
def reset_model(self):
self.set_state(