mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-02 06:16:32 +00:00
fix: restore auto human rendering (#3063)
* restore auto human rendering * add assert for minimal modes
This commit is contained in:
@@ -510,6 +510,8 @@ class BipedalWalker(gym.Env, EzPickle):
|
||||
return fraction
|
||||
|
||||
self.lidar = [LidarCallback() for _ in range(10)]
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self.step(np.array([0, 0, 0, 0]))[0], {}
|
||||
|
||||
def step(self, action: np.ndarray):
|
||||
@@ -598,6 +600,9 @@ class BipedalWalker(gym.Env, EzPickle):
|
||||
terminated = True
|
||||
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
|
||||
terminated = True
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def render(self):
|
||||
|
@@ -513,6 +513,8 @@ class CarRacing(gym.Env, EzPickle):
|
||||
)
|
||||
self.car = Car(self.world, *self.track[0][1:4])
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self.step(None)[0], {}
|
||||
|
||||
def step(self, action: Union[np.ndarray, int]):
|
||||
@@ -558,6 +560,8 @@ class CarRacing(gym.Env, EzPickle):
|
||||
terminated = True
|
||||
step_reward = -100
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self.state, step_reward, terminated, truncated, {}
|
||||
|
||||
def render(self):
|
||||
|
@@ -415,6 +415,8 @@ class LunarLander(gym.Env, EzPickle):
|
||||
|
||||
self.drawlist = [self.lander] + self.legs
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self.step(np.array([0, 0]) if self.continuous else 0)[0], {}
|
||||
|
||||
def _create_particle(self, mass, x, y, ttl):
|
||||
@@ -592,6 +594,9 @@ class LunarLander(gym.Env, EzPickle):
|
||||
if not self.lander.awake:
|
||||
terminated = True
|
||||
reward = +100
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return np.array(state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def render(self):
|
||||
|
@@ -189,6 +189,8 @@ class AcrobotEnv(core.Env):
|
||||
np.float32
|
||||
)
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self._get_ob(), {}
|
||||
|
||||
def step(self, a):
|
||||
@@ -216,6 +218,8 @@ class AcrobotEnv(core.Env):
|
||||
terminated = self._terminal()
|
||||
reward = -1.0 if not terminated else 0.0
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (self._get_ob(), reward, terminated, False, {})
|
||||
|
||||
def _get_ob(self):
|
||||
|
@@ -183,6 +183,8 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
|
||||
self.steps_beyond_terminated += 1
|
||||
reward = 0.0
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def reset(
|
||||
@@ -199,6 +201,9 @@ class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
|
||||
) # default high
|
||||
self.state = self.np_random.uniform(low=low, high=high, size=(4,))
|
||||
self.steps_beyond_terminated = None
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return np.array(self.state, dtype=np.float32), {}
|
||||
|
||||
def render(self):
|
||||
|
@@ -169,6 +169,9 @@ class Continuous_MountainCarEnv(gym.Env):
|
||||
reward -= math.pow(action[0], 2) * 0.1
|
||||
|
||||
self.state = np.array([position, velocity], dtype=np.float32)
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self.state, reward, terminated, False, {}
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||
@@ -177,6 +180,9 @@ class Continuous_MountainCarEnv(gym.Env):
|
||||
# state/observations.
|
||||
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
|
||||
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return np.array(self.state, dtype=np.float32), {}
|
||||
|
||||
def _height(self, xs):
|
||||
|
@@ -143,6 +143,8 @@ class MountainCarEnv(gym.Env):
|
||||
reward = -1.0
|
||||
|
||||
self.state = (position, velocity)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
|
||||
|
||||
def reset(
|
||||
@@ -156,6 +158,9 @@ class MountainCarEnv(gym.Env):
|
||||
# state/observations.
|
||||
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
|
||||
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return np.array(self.state, dtype=np.float32), {}
|
||||
|
||||
def _height(self, xs):
|
||||
|
@@ -133,6 +133,9 @@ class PendulumEnv(gym.Env):
|
||||
newth = th + newthdot * dt
|
||||
|
||||
self.state = np.array([newth, newthdot])
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self._get_obs(), -costs, False, False, {}
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||
@@ -151,6 +154,8 @@ class PendulumEnv(gym.Env):
|
||||
self.state = self.np_random.uniform(low=low, high=high)
|
||||
self.last_u = None
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self._get_obs(), {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -42,6 +42,9 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
)
|
||||
terminated = not not_terminated
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -144,6 +144,8 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -313,6 +313,8 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
reward = rewards - costs
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -32,6 +32,9 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
reward_run = (xposafter - xposbefore) / self.dt
|
||||
reward = reward_ctrl + reward_run
|
||||
terminated = False
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -87,6 +87,8 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
"reward_ctrl": -ctrl_cost,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -205,6 +205,8 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
||||
"reward_ctrl": -ctrl_cost,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -39,6 +39,9 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
and (abs(ang) < 0.2)
|
||||
)
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -148,6 +148,8 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
"x_velocity": x_velocity,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -269,6 +269,8 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
||||
"x_velocity": x_velocity,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -57,6 +57,9 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
|
||||
qpos = self.sim.data.qpos
|
||||
terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
|
@@ -171,6 +171,8 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -346,6 +346,8 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -52,6 +52,8 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
|
@@ -224,6 +224,8 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (
|
||||
self._get_obs(),
|
||||
reward,
|
||||
|
@@ -37,6 +37,9 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
alive_bonus = 10
|
||||
r = alive_bonus - dist_penalty - vel_penalty
|
||||
terminated = bool(y <= 1)
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return ob, r, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -142,6 +142,8 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
|
||||
alive_bonus = 10
|
||||
r = alive_bonus - dist_penalty - vel_penalty
|
||||
terminated = bool(y <= 1)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return ob, r, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -32,6 +32,9 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
|
||||
ob = self._get_obs()
|
||||
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -108,6 +108,8 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
ob = self._get_obs()
|
||||
terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2))
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -134,6 +134,8 @@ class BaseMujocoEnv(gym.Env):
|
||||
self._reset_simulation()
|
||||
|
||||
ob = self.reset_model()
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return ob, {}
|
||||
|
||||
def set_state(self, qpos, qvel):
|
||||
|
@@ -32,6 +32,8 @@ class PusherEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
return (
|
||||
|
@@ -154,6 +154,9 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
|
||||
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
return (
|
||||
ob,
|
||||
|
@@ -27,7 +27,10 @@ class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
reward_dist = -np.linalg.norm(vec)
|
||||
reward_ctrl = -np.square(a).sum()
|
||||
reward = reward_dist + reward_ctrl
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
return (
|
||||
|
@@ -139,7 +139,11 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
||||
reward_dist = -np.linalg.norm(vec)
|
||||
reward_ctrl = -np.square(a).sum()
|
||||
reward = reward_dist + reward_ctrl
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
ob = self._get_obs()
|
||||
return (
|
||||
ob,
|
||||
|
@@ -32,6 +32,10 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
reward_ctrl = -ctrl_cost_coeff * np.square(a).sum()
|
||||
reward = reward_fwd + reward_ctrl
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
return (
|
||||
ob,
|
||||
reward,
|
||||
|
@@ -88,6 +88,9 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -199,6 +199,9 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
|
||||
"forward_reward": forward_reward,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
return observation, reward, False, False, info
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -33,6 +33,10 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
reward -= 1e-3 * np.square(a).sum()
|
||||
terminated = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0)
|
||||
ob = self._get_obs()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
return ob, reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
|
@@ -137,6 +137,9 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
||||
"x_velocity": x_velocity,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -266,6 +266,9 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
||||
"x_velocity": x_velocity,
|
||||
}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
|
||||
return observation, reward, terminated, False, info
|
||||
|
||||
def reset_model(self):
|
||||
|
@@ -156,6 +156,9 @@ class BlackjackEnv(gym.Env):
|
||||
):
|
||||
# Natural gives extra points, but doesn't autowin. Legacy implementation
|
||||
reward = 1.5
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self._get_obs(), reward, terminated, False, {}
|
||||
|
||||
def _get_obs(self):
|
||||
@@ -182,6 +185,8 @@ class BlackjackEnv(gym.Env):
|
||||
else:
|
||||
self.dealer_top_card_value_str = str(dealer_card_value)
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return self._get_obs(), {}
|
||||
|
||||
def render(self):
|
||||
|
@@ -148,6 +148,9 @@ class CliffWalkingEnv(Env):
|
||||
p, s, r, t = transitions[i]
|
||||
self.s = s
|
||||
self.lastaction = a
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (int(s), r, t, False, {"prob": p})
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
|
||||
@@ -155,6 +158,8 @@ class CliffWalkingEnv(Env):
|
||||
self.s = categorical_sample(self.initial_state_distrib, self.np_random)
|
||||
self.lastaction = None
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return int(self.s), {"prob": 1}
|
||||
|
||||
def render(self):
|
||||
|
@@ -247,6 +247,9 @@ class FrozenLakeEnv(Env):
|
||||
p, s, r, t = transitions[i]
|
||||
self.s = s
|
||||
self.lastaction = a
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (int(s), r, t, False, {"prob": p})
|
||||
|
||||
def reset(
|
||||
@@ -259,6 +262,8 @@ class FrozenLakeEnv(Env):
|
||||
self.s = categorical_sample(self.initial_state_distrib, self.np_random)
|
||||
self.lastaction = None
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return int(self.s), {"prob": 1}
|
||||
|
||||
def render(self):
|
||||
|
@@ -257,6 +257,9 @@ class TaxiEnv(Env):
|
||||
p, s, r, t = transitions[i]
|
||||
self.s = s
|
||||
self.lastaction = a
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)})
|
||||
|
||||
def reset(
|
||||
@@ -270,6 +273,8 @@ class TaxiEnv(Env):
|
||||
self.lastaction = None
|
||||
self.taxi_orientation = 0
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)}
|
||||
|
||||
def render(self):
|
||||
|
@@ -165,7 +165,9 @@ def test_render_modes(spec):
|
||||
"""
|
||||
env = spec.make()
|
||||
|
||||
assert len(env.metadata["render_modes"]) > 0
|
||||
assert "rgb_array" in env.metadata["render_modes"]
|
||||
assert "human" in env.metadata["render_modes"]
|
||||
|
||||
for mode in env.metadata["render_modes"]:
|
||||
if mode != "human":
|
||||
new_env = spec.make(render_mode=mode)
|
||||
|
Reference in New Issue
Block a user