mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-30 01:50:19 +00:00
Lint fixes to mujoco environment (#377)
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from gym.envs.mujoco.mujoco_env import MujocoEnv
|
||||
from gym.envs.mujoco.mujoco_env import MujocoEnv
|
||||
# ^^^^^ so that user gets the correct error
|
||||
# message if mujoco is not installed correctly
|
||||
from gym.envs.mujoco.ant import AntEnv
|
||||
|
@@ -36,7 +36,7 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq,low=-.1,high=.1)
|
||||
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
|
||||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
@@ -8,15 +8,15 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def _step(self, action):
|
||||
xposbefore = self.model.data.qpos[0,0]
|
||||
xposbefore = self.model.data.qpos[0, 0]
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
xposafter = self.model.data.qpos[0,0]
|
||||
xposafter = self.model.data.qpos[0, 0]
|
||||
ob = self._get_obs()
|
||||
reward_ctrl = - 0.1 * np.square(action).sum()
|
||||
reward_run = (xposafter - xposbefore)/self.dt
|
||||
reward = reward_ctrl + reward_run
|
||||
done = False
|
||||
return ob, reward, done, dict(reward_run = reward_run, reward_ctrl=reward_ctrl)
|
||||
return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
|
@@ -8,9 +8,9 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def _step(self, a):
|
||||
posbefore = self.model.data.qpos[0,0]
|
||||
posbefore = self.model.data.qpos[0, 0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
posafter,height,ang = self.model.data.qpos[0:3,0]
|
||||
posafter, height, ang = self.model.data.qpos[0:3, 0]
|
||||
alive_bonus = 1.0
|
||||
reward = (posafter - posbefore) / self.dt
|
||||
reward += alive_bonus
|
||||
@@ -24,7 +24,7 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
self.model.data.qpos.flat[1:],
|
||||
np.clip(self.model.data.qvel.flat,-10,10)
|
||||
np.clip(self.model.data.qvel.flat, -10, 10)
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -28,7 +28,7 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
alive_bonus = 5.0
|
||||
data = self.model.data
|
||||
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
|
||||
|
@@ -25,9 +25,9 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
pos_after = self.model.data.qpos[2][0]
|
||||
data = self.model.data
|
||||
uph_cost=(pos_after - 0 ) / self.model.opt.timestep
|
||||
uph_cost = (pos_after - 0) / self.model.opt.timestep
|
||||
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
|
||||
|
@@ -38,6 +38,6 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
|
||||
def viewer_setup(self):
|
||||
v = self.viewer
|
||||
v.cam.trackbodyid=0
|
||||
v.cam.trackbodyid = 0
|
||||
v.cam.distance = v.model.stat.extent * 0.5
|
||||
v.cam.lookat[2] += 3#v.model.stat.center[2]
|
||||
v.cam.lookat[2] += 3 # v.model.stat.center[2]
|
||||
|
@@ -26,5 +26,5 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
|
||||
def viewer_setup(self):
|
||||
v = self.viewer
|
||||
v.cam.trackbodyid=0
|
||||
v.cam.trackbodyid = 0
|
||||
v.cam.distance = v.model.stat.extent
|
||||
|
@@ -23,15 +23,15 @@ class MujocoEnv(gym.Env):
|
||||
else:
|
||||
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
|
||||
if not path.exists(fullpath):
|
||||
raise IOError("File %s does not exist"%fullpath)
|
||||
self.frame_skip= frame_skip
|
||||
raise IOError("File %s does not exist" % fullpath)
|
||||
self.frame_skip = frame_skip
|
||||
self.model = mujoco_py.MjModel(fullpath)
|
||||
self.data = self.model.data
|
||||
self.viewer = None
|
||||
|
||||
self.metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : int(np.round(1.0 / self.dt))
|
||||
'video.frames_per_second': int(np.round(1.0 / self.dt))
|
||||
}
|
||||
|
||||
self.init_qpos = self.model.data.qpos.ravel().copy()
|
||||
@@ -87,10 +87,9 @@ class MujocoEnv(gym.Env):
|
||||
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
|
||||
self.model.data.qpos = qpos
|
||||
self.model.data.qvel = qvel
|
||||
self.model._compute_subtree() #pylint: disable=W0212
|
||||
self.model._compute_subtree() # pylint: disable=W0212
|
||||
self.model.forward()
|
||||
|
||||
|
||||
@property
|
||||
def dt(self):
|
||||
return self.model.opt.timestep * self.frame_skip
|
||||
@@ -110,7 +109,7 @@ class MujocoEnv(gym.Env):
|
||||
if mode == 'rgb_array':
|
||||
self._get_viewer().render()
|
||||
data, width, height = self._get_viewer().get_image()
|
||||
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:]
|
||||
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :]
|
||||
elif mode == 'human':
|
||||
self._get_viewer().loop_once()
|
||||
|
||||
|
@@ -18,13 +18,14 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid=0
|
||||
self.viewer.cam.trackbodyid = 0
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
||||
while True:
|
||||
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
|
||||
if np.linalg.norm(self.goal) < 2: break
|
||||
if np.linalg.norm(self.goal) < 2:
|
||||
break
|
||||
qpos[-2:] = self.goal
|
||||
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
qvel[-2:] = 0
|
||||
|
@@ -9,15 +9,14 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
|
||||
def _step(self, a):
|
||||
ctrl_cost_coeff = 0.0001
|
||||
xposbefore = self.model.data.qpos[0,0]
|
||||
xposbefore = self.model.data.qpos[0, 0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
xposafter = self.model.data.qpos[0,0]
|
||||
xposafter = self.model.data.qpos[0, 0]
|
||||
reward_fwd = (xposafter - xposbefore) / self.dt
|
||||
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
|
||||
reward = reward_fwd + reward_ctrl
|
||||
ob = self._get_obs()
|
||||
return ob, reward, False, dict(reward_fwd = reward_fwd, reward_ctrl=reward_ctrl)
|
||||
|
||||
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
|
||||
|
||||
def _get_obs(self):
|
||||
qpos = self.model.data.qpos
|
||||
|
@@ -9,22 +9,22 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def _step(self, a):
|
||||
posbefore = self.model.data.qpos[0,0]
|
||||
posbefore = self.model.data.qpos[0, 0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
posafter,height,ang = self.model.data.qpos[0:3,0]
|
||||
posafter, height, ang = self.model.data.qpos[0:3, 0]
|
||||
alive_bonus = 1.0
|
||||
reward = ((posafter - posbefore) / self.dt )
|
||||
reward = ((posafter - posbefore) / self.dt)
|
||||
reward += alive_bonus
|
||||
reward -= 1e-3 * np.square(a).sum()
|
||||
done = not (height > 0.8 and height < 2.0
|
||||
and ang > -1.0 and ang < 1.0)
|
||||
done = not (height > 0.8 and height < 2.0 and
|
||||
ang > -1.0 and ang < 1.0)
|
||||
ob = self._get_obs()
|
||||
return ob, reward, done, {}
|
||||
|
||||
def _get_obs(self):
|
||||
qpos = self.model.data.qpos
|
||||
qvel = self.model.data.qvel
|
||||
return np.concatenate([qpos[1:], np.clip(qvel,-10,10)]).ravel()
|
||||
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
|
Reference in New Issue
Block a user