fix: restore auto human rendering (#3063)

* restore auto human rendering

* add assert for minimal modes
This commit is contained in:
Omar Younis
2022-09-05 21:56:36 +02:00
committed by GitHub
parent a18e0a3d6c
commit 2f33096040
42 changed files with 138 additions and 1 deletions

View File

@@ -510,6 +510,8 @@ class BipedalWalker(gym.Env, EzPickle):
return fraction
self.lidar = [LidarCallback() for _ in range(10)]
if self.render_mode == "human":
self.render()
return self.step(np.array([0, 0, 0, 0]))[0], {}
def step(self, action: np.ndarray):
@@ -598,6 +600,9 @@ class BipedalWalker(gym.Env, EzPickle):
terminated = True
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
terminated = True
if self.render_mode == "human":
self.render()
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):

View File

@@ -513,6 +513,8 @@ class CarRacing(gym.Env, EzPickle):
)
self.car = Car(self.world, *self.track[0][1:4])
if self.render_mode == "human":
self.render()
return self.step(None)[0], {}
def step(self, action: Union[np.ndarray, int]):
@@ -558,6 +560,8 @@ class CarRacing(gym.Env, EzPickle):
terminated = True
step_reward = -100
if self.render_mode == "human":
self.render()
return self.state, step_reward, terminated, truncated, {}
def render(self):

View File

@@ -415,6 +415,8 @@ class LunarLander(gym.Env, EzPickle):
self.drawlist = [self.lander] + self.legs
if self.render_mode == "human":
self.render()
return self.step(np.array([0, 0]) if self.continuous else 0)[0], {}
def _create_particle(self, mass, x, y, ttl):
@@ -592,6 +594,9 @@ class LunarLander(gym.Env, EzPickle):
if not self.lander.awake:
terminated = True
reward = +100
if self.render_mode == "human":
self.render()
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):

View File

@@ -189,6 +189,8 @@ class AcrobotEnv(core.Env):
np.float32
)
if self.render_mode == "human":
self.render()
return self._get_ob(), {}
def step(self, a):
@@ -216,6 +218,8 @@ class AcrobotEnv(core.Env):
terminated = self._terminal()
reward = -1.0 if not terminated else 0.0
if self.render_mode == "human":
self.render()
return (self._get_ob(), reward, terminated, False, {})
def _get_ob(self):

View File

@@ -183,6 +183,8 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
self.steps_beyond_terminated += 1
reward = 0.0
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(
@@ -199,6 +201,9 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
) # default high
self.state = self.np_random.uniform(low=low, high=high, size=(4,))
self.steps_beyond_terminated = None
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def render(self):

View File

@@ -169,6 +169,9 @@ class Continuous_MountainCarEnv(gym.Env):
reward -= math.pow(action[0], 2) * 0.1
self.state = np.array([position, velocity], dtype=np.float32)
if self.render_mode == "human":
self.render()
return self.state, reward, terminated, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
@@ -177,6 +180,9 @@ class Continuous_MountainCarEnv(gym.Env):
# state/observations.
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def _height(self, xs):

View File

@@ -143,6 +143,8 @@ class MountainCarEnv(gym.Env):
reward = -1.0
self.state = (position, velocity)
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(
@@ -156,6 +158,9 @@ class MountainCarEnv(gym.Env):
# state/observations.
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def _height(self, xs):

View File

@@ -133,6 +133,9 @@ class PendulumEnv(gym.Env):
newth = th + newthdot * dt
self.state = np.array([newth, newthdot])
if self.render_mode == "human":
self.render()
return self._get_obs(), -costs, False, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
@@ -151,6 +154,8 @@ class PendulumEnv(gym.Env):
self.state = self.np_random.uniform(low=low, high=high)
self.last_u = None
if self.render_mode == "human":
self.render()
return self._get_obs(), {}
def _get_obs(self):

View File

@@ -42,6 +42,9 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
)
terminated = not not_terminated
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return (
ob,
reward,

View File

@@ -144,6 +144,8 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -313,6 +313,8 @@ class AntEnv(MujocoEnv, utils.EzPickle):
reward = rewards - costs
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -32,6 +32,9 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
reward_run = (xposafter - xposbefore) / self.dt
reward = reward_ctrl + reward_run
terminated = False
if self.render_mode == "human":
self.render()
return (
ob,
reward,

View File

@@ -87,6 +87,8 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
"reward_ctrl": -ctrl_cost,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -205,6 +205,8 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
"reward_ctrl": -ctrl_cost,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def _get_obs(self):

View File

@@ -39,6 +39,9 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
and (abs(ang) < 0.2)
)
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def _get_obs(self):

View File

@@ -148,6 +148,8 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -269,6 +269,8 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -57,6 +57,9 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.sim.data.qpos
terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,

View File

@@ -171,6 +171,8 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -346,6 +346,8 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -52,6 +52,8 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,

View File

@@ -224,6 +224,8 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,

View File

@@ -37,6 +37,9 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
return ob, r, terminated, False, {}
def _get_obs(self):

View File

@@ -142,6 +142,8 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
terminated = bool(y <= 1)
if self.render_mode == "human":
self.render()
return ob, r, terminated, False, {}
def _get_obs(self):

View File

@@ -32,6 +32,9 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
ob = self._get_obs()
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def reset_model(self):

View File

@@ -108,6 +108,8 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def reset_model(self):

View File

@@ -134,6 +134,8 @@ class BaseMujocoEnv(gym.Env):
self._reset_simulation()
ob = self.reset_model()
if self.render_mode == "human":
self.render()
return ob, {}
def set_state(self, qpos, qvel):

View File

@@ -32,6 +32,8 @@ class PusherEnv(MuJocoPyEnv, utils.EzPickle):
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (

View File

@@ -154,6 +154,9 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (
ob,

View File

@@ -27,7 +27,10 @@ class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
reward_dist = -np.linalg.norm(vec)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (

View File

@@ -139,7 +139,11 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
reward_dist = -np.linalg.norm(vec)
reward_ctrl = -np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
if self.render_mode == "human":
self.render()
ob = self._get_obs()
return (
ob,

View File

@@ -32,6 +32,10 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return (
ob,
reward,

View File

@@ -88,6 +88,9 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, False, False, info
def _get_obs(self):

View File

@@ -199,6 +199,9 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
"forward_reward": forward_reward,
}
if self.render_mode == "human":
self.render()
return observation, reward, False, False, info
def _get_obs(self):

View File

@@ -33,6 +33,10 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
reward -= 1e-3 * np.square(a).sum()
terminated = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
ob = self._get_obs()
if self.render_mode == "human":
self.render()
return ob, reward, terminated, False, {}
def _get_obs(self):

View File

@@ -137,6 +137,9 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -266,6 +266,9 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
"x_velocity": x_velocity,
}
if self.render_mode == "human":
self.render()
return observation, reward, terminated, False, info
def reset_model(self):

View File

@@ -156,6 +156,9 @@ class BlackjackEnv(gym.Env):
):
# Natural gives extra points, but doesn't autowin. Legacy implementation
reward = 1.5
if self.render_mode == "human":
self.render()
return self._get_obs(), reward, terminated, False, {}
def _get_obs(self):
@@ -182,6 +185,8 @@ class BlackjackEnv(gym.Env):
else:
self.dealer_top_card_value_str = str(dealer_card_value)
if self.render_mode == "human":
self.render()
return self._get_obs(), {}
def render(self):

View File

@@ -148,6 +148,9 @@ class CliffWalkingEnv(Env):
p, s, r, t = transitions[i]
self.s = s
self.lastaction = a
if self.render_mode == "human":
self.render()
return (int(s), r, t, False, {"prob": p})
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
@@ -155,6 +158,8 @@ class CliffWalkingEnv(Env):
self.s = categorical_sample(self.initial_state_distrib, self.np_random)
self.lastaction = None
if self.render_mode == "human":
self.render()
return int(self.s), {"prob": 1}
def render(self):

View File

@@ -247,6 +247,9 @@ class FrozenLakeEnv(Env):
p, s, r, t = transitions[i]
self.s = s
self.lastaction = a
if self.render_mode == "human":
self.render()
return (int(s), r, t, False, {"prob": p})
def reset(
@@ -259,6 +262,8 @@ class FrozenLakeEnv(Env):
self.s = categorical_sample(self.initial_state_distrib, self.np_random)
self.lastaction = None
if self.render_mode == "human":
self.render()
return int(self.s), {"prob": 1}
def render(self):

View File

@@ -257,6 +257,9 @@ class TaxiEnv(Env):
p, s, r, t = transitions[i]
self.s = s
self.lastaction = a
if self.render_mode == "human":
self.render()
return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)})
def reset(
@@ -270,6 +273,8 @@ class TaxiEnv(Env):
self.lastaction = None
self.taxi_orientation = 0
if self.render_mode == "human":
self.render()
return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)}
def render(self):

View File

@@ -165,7 +165,9 @@ def test_render_modes(spec):
"""
env = spec.make()
assert len(env.metadata["render_modes"]) > 0
assert "rgb_array" in env.metadata["render_modes"]
assert "human" in env.metadata["render_modes"]
for mode in env.metadata["render_modes"]:
if mode != "human":
new_env = spec.make(render_mode=mode)