diff --git a/gym/envs/mujoco/ant.py b/gym/envs/mujoco/ant.py index 191a8acb7..24df78b59 100644 --- a/gym/envs/mujoco/ant.py +++ b/gym/envs/mujoco/ant.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -17,8 +18,16 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box( + low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64 + ) mujoco_env.MujocoEnv.__init__( - self, "ant.xml", 5, mujoco_bindings="mujoco_py", **kwargs + self, + "ant.xml", + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/ant_v3.py b/gym/envs/mujoco/ant_v3.py index de992307d..5c0246a34 100644 --- a/gym/envs/mujoco/ant_v3.py +++ b/gym/envs/mujoco/ant_v3.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, @@ -50,8 +51,22 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64 + ) + mujoco_env.MujocoEnv.__init__( - self, xml_file, 5, mujoco_bindings="mujoco_py", **kwargs + self, + xml_file, + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) @property diff --git a/gym/envs/mujoco/ant_v4.py b/gym/envs/mujoco/ant_v4.py index 37fe4aa5a..414e9507a 100644 --- a/gym/envs/mujoco/ant_v4.py +++ b/gym/envs/mujoco/ant_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, @@ -215,7 +216,19 @@ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, xml_file, 5, **kwargs) + obs_shape = 27 + if not exclude_current_positions_from_observation: + obs_shape += 2 + if use_contact_forces: + obs_shape += 84 + + observation_space = Box( + low=-np.inf, high=np.inf, shape=(obs_shape,), dtype=np.float64 + ) + + mujoco_env.MujocoEnv.__init__( + self, xml_file, 5, observation_space=observation_space, **kwargs + ) @property def healthy_reward(self): diff --git a/gym/envs/mujoco/half_cheetah.py b/gym/envs/mujoco/half_cheetah.py index e3bb35aac..5ef374295 100644 --- a/gym/envs/mujoco/half_cheetah.py +++ b/gym/envs/mujoco/half_cheetah.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -17,8 +18,14 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( - self, "half_cheetah.xml", 5, mujoco_bindings="mujoco_py", **kwargs + self, + "half_cheetah.xml", + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/half_cheetah_v3.py b/gym/envs/mujoco/half_cheetah_v3.py index e6aef4a4a..c25c238df 100644 --- a/gym/envs/mujoco/half_cheetah_v3.py +++ b/gym/envs/mujoco/half_cheetah_v3.py @@ -4,6 +4,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, @@ -43,8 +44,22 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + mujoco_env.MujocoEnv.__init__( - self, xml_file, 5, mujoco_bindings="mujoco_py", **kwargs + self, + xml_file, + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) def control_cost(self, action): diff --git a/gym/envs/mujoco/half_cheetah_v4.py b/gym/envs/mujoco/half_cheetah_v4.py index fe7d33f34..1a56c4711 100644 --- a/gym/envs/mujoco/half_cheetah_v4.py +++ b/gym/envs/mujoco/half_cheetah_v4.py @@ -4,6 +4,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, @@ -162,7 +163,18 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 5, **kwargs) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + + mujoco_env.MujocoEnv.__init__( + self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs + ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) diff --git a/gym/envs/mujoco/hopper.py b/gym/envs/mujoco/hopper.py index ce273aa8d..b2edaafc7 100644 --- a/gym/envs/mujoco/hopper.py +++ b/gym/envs/mujoco/hopper.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -17,8 +18,14 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( - self, "hopper.xml", 4, mujoco_bindings="mujoco_py", **kwargs + self, + "hopper.xml", + 4, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/hopper_v3.py b/gym/envs/mujoco/hopper_v3.py index f5fdc3db1..cf6f06e0b 100644 --- a/gym/envs/mujoco/hopper_v3.py +++ b/gym/envs/mujoco/hopper_v3.py @@ -4,6 +4,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, @@ -58,8 +59,22 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64 + ) + mujoco_env.MujocoEnv.__init__( - self, xml_file, 4, mujoco_bindings="mujoco_py", **kwargs + self, + xml_file, + 4, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) @property diff --git a/gym/envs/mujoco/hopper_v4.py b/gym/envs/mujoco/hopper_v4.py index 00480d41e..5cf9b9f10 100644 --- a/gym/envs/mujoco/hopper_v4.py +++ b/gym/envs/mujoco/hopper_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, @@ -180,7 +181,18 @@ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4, **kwargs) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64 + ) + + mujoco_env.MujocoEnv.__init__( + self, "hopper.xml", 4, observation_space=observation_space, **kwargs + ) @property def healthy_reward(self): diff --git a/gym/envs/mujoco/humanoid.py b/gym/envs/mujoco/humanoid.py index 0bcf29b23..0bbdd3e4c 100644 --- a/gym/envs/mujoco/humanoid.py +++ b/gym/envs/mujoco/humanoid.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box def mass_center(model, sim): @@ -23,8 +24,16 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box( + low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 + ) mujoco_env.MujocoEnv.__init__( - self, "humanoid.xml", 5, mujoco_bindings="mujoco_py", **kwargs + self, + "humanoid.xml", + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/humanoid_v3.py b/gym/envs/mujoco/humanoid_v3.py index 704d2bd76..f78dc852f 100644 --- a/gym/envs/mujoco/humanoid_v3.py +++ b/gym/envs/mujoco/humanoid_v3.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 1, @@ -58,9 +59,22 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64 + ) mujoco_env.MujocoEnv.__init__( - self, xml_file, 5, mujoco_bindings="mujoco_py", **kwargs + self, + xml_file, + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) @property diff --git a/gym/envs/mujoco/humanoid_v4.py b/gym/envs/mujoco/humanoid_v4.py index 7259190be..43f76068b 100644 --- a/gym/envs/mujoco/humanoid_v4.py +++ b/gym/envs/mujoco/humanoid_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 1, @@ -248,7 +249,18 @@ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5, **kwargs) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64 + ) + + mujoco_env.MujocoEnv.__init__( + self, "humanoid.xml", 5, observation_space=observation_space, **kwargs + ) @property def healthy_reward(self): diff --git a/gym/envs/mujoco/humanoidstandup.py b/gym/envs/mujoco/humanoidstandup.py index 97983d876..8e68d8bb6 100644 --- a/gym/envs/mujoco/humanoidstandup.py +++ b/gym/envs/mujoco/humanoidstandup.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -17,8 +18,16 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box( + low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 + ) mujoco_env.MujocoEnv.__init__( - self, "humanoidstandup.xml", 5, mujoco_bindings="mujoco_py", **kwargs + self, + "humanoidstandup.xml", + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/humanoidstandup_v4.py b/gym/envs/mujoco/humanoidstandup_v4.py index bea03c499..0f013164b 100644 --- a/gym/envs/mujoco/humanoidstandup_v4.py +++ b/gym/envs/mujoco/humanoidstandup_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -189,7 +190,16 @@ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): - mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5, **kwargs) + observation_space = Box( + low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 + ) + mujoco_env.MujocoEnv.__init__( + self, + "humanoidstandup.xml", + 5, + observation_space=observation_space, + **kwargs + ) utils.EzPickle.__init__(self) def _get_obs(self): diff --git a/gym/envs/mujoco/inverted_double_pendulum.py b/gym/envs/mujoco/inverted_double_pendulum.py index 53fc06bbf..5e6137ec8 100644 --- a/gym/envs/mujoco/inverted_double_pendulum.py +++ b/gym/envs/mujoco/inverted_double_pendulum.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -17,11 +18,13 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( self, "inverted_double_pendulum.xml", 5, mujoco_bindings="mujoco_py", + observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/inverted_double_pendulum_v4.py b/gym/envs/mujoco/inverted_double_pendulum_v4.py index 6e0b7f3ad..4fa7353d1 100644 --- a/gym/envs/mujoco/inverted_double_pendulum_v4.py +++ b/gym/envs/mujoco/inverted_double_pendulum_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -123,7 +124,14 @@ class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): - mujoco_env.MujocoEnv.__init__(self, "inverted_double_pendulum.xml", 5, **kwargs) + observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) + mujoco_env.MujocoEnv.__init__( + self, + "inverted_double_pendulum.xml", + 5, + observation_space=observation_space, + **kwargs + ) utils.EzPickle.__init__(self) def step(self, action): diff --git a/gym/envs/mujoco/inverted_pendulum.py b/gym/envs/mujoco/inverted_pendulum.py index 6913ad697..74cc238c5 100644 --- a/gym/envs/mujoco/inverted_pendulum.py +++ b/gym/envs/mujoco/inverted_pendulum.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -18,8 +19,14 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self, **kwargs): utils.EzPickle.__init__(self) + observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( - self, "inverted_pendulum.xml", 2, mujoco_bindings="mujoco_py", **kwargs + self, + "inverted_pendulum.xml", + 2, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) def step(self, a): diff --git a/gym/envs/mujoco/inverted_pendulum_v4.py b/gym/envs/mujoco/inverted_pendulum_v4.py index 6a197bcae..d53e3d2ee 100644 --- a/gym/envs/mujoco/inverted_pendulum_v4.py +++ b/gym/envs/mujoco/inverted_pendulum_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -95,7 +96,14 @@ class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self, **kwargs): utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, "inverted_pendulum.xml", 2, **kwargs) + observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64) + mujoco_env.MujocoEnv.__init__( + self, + "inverted_pendulum.xml", + 2, + observation_space=observation_space, + **kwargs + ) def step(self, a): reward = 1.0 diff --git a/gym/envs/mujoco/mujoco_env.py b/gym/envs/mujoco/mujoco_env.py index 0af733400..50bccdd19 100644 --- a/gym/envs/mujoco/mujoco_env.py +++ b/gym/envs/mujoco/mujoco_env.py @@ -7,6 +7,7 @@ import numpy as np import gym from gym import error, logger, spaces +from gym.spaces import Space from gym.utils.renderer import Renderer DEFAULT_SIZE = 480 @@ -39,6 +40,7 @@ class MujocoEnv(gym.Env): self, model_path, frame_skip, + observation_space: Space, render_mode: Optional[str] = None, width: int = DEFAULT_SIZE, height: int = DEFAULT_SIZE, @@ -120,11 +122,7 @@ class MujocoEnv(gym.Env): ) self.renderer = Renderer(self.render_mode, render_frame) - action = self.action_space.sample() - observation, _reward, done, _info = self.step(action) - assert not done - - self._set_observation_space(observation) + self.observation_space = observation_space def _set_action_space(self): bounds = self.model.actuator_ctrlrange.copy().astype(np.float32) @@ -132,10 +130,6 @@ class MujocoEnv(gym.Env): self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) return self.action_space - def _set_observation_space(self, observation): - self.observation_space = convert_observation_to_space(observation) - return self.observation_space - # methods to override: # ---------------------------- diff --git a/gym/envs/mujoco/pusher.py b/gym/envs/mujoco/pusher.py index 1053eb57a..11e7fe4b7 100644 --- a/gym/envs/mujoco/pusher.py +++ b/gym/envs/mujoco/pusher.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -18,8 +19,14 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self, **kwargs): utils.EzPickle.__init__(self) + observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( - self, "pusher.xml", 5, mujoco_bindings="mujoco_py", **kwargs + self, + "pusher.xml", + 5, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) def step(self, a): diff --git a/gym/envs/mujoco/pusher_v4.py b/gym/envs/mujoco/pusher_v4.py index 8f2892c5f..02bdb6776 100644 --- a/gym/envs/mujoco/pusher_v4.py +++ b/gym/envs/mujoco/pusher_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -140,7 +141,10 @@ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self, **kwargs): utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5, **kwargs) + observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64) + mujoco_env.MujocoEnv.__init__( + self, "pusher.xml", 5, observation_space=observation_space, **kwargs + ) def step(self, a): vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") diff --git a/gym/envs/mujoco/reacher.py b/gym/envs/mujoco/reacher.py index a617c9fc9..e553bfbb4 100644 --- a/gym/envs/mujoco/reacher.py +++ b/gym/envs/mujoco/reacher.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -18,8 +19,14 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self, **kwargs): utils.EzPickle.__init__(self) + observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( - self, "reacher.xml", 2, mujoco_bindings="mujoco_py", **kwargs + self, + "reacher.xml", + 2, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) def step(self, a): diff --git a/gym/envs/mujoco/reacher_v4.py b/gym/envs/mujoco/reacher_v4.py index fb7a5d72a..d91bdd4c5 100644 --- a/gym/envs/mujoco/reacher_v4.py +++ b/gym/envs/mujoco/reacher_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -130,7 +131,10 @@ class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self, **kwargs): utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, "reacher.xml", 2, **kwargs) + observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) + mujoco_env.MujocoEnv.__init__( + self, "reacher.xml", 2, observation_space=observation_space, **kwargs + ) def step(self, a): vec = self.get_body_com("fingertip") - self.get_body_com("target") diff --git a/gym/envs/mujoco/swimmer.py b/gym/envs/mujoco/swimmer.py index 5844328f5..7270d1d38 100644 --- a/gym/envs/mujoco/swimmer.py +++ b/gym/envs/mujoco/swimmer.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -17,8 +18,14 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( - self, "swimmer.xml", 4, mujoco_bindings="mujoco_py", **kwargs + self, + "swimmer.xml", + 4, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/swimmer_v3.py b/gym/envs/mujoco/swimmer_v3.py index 516266d84..53d431a64 100644 --- a/gym/envs/mujoco/swimmer_v3.py +++ b/gym/envs/mujoco/swimmer_v3.py @@ -4,6 +4,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = {} @@ -40,8 +41,22 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 + ) + mujoco_env.MujocoEnv.__init__( - self, xml_file, 4, mujoco_bindings="mujoco_py", **kwargs + self, + xml_file, + 4, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) def control_cost(self, action): diff --git a/gym/envs/mujoco/swimmer_v4.py b/gym/envs/mujoco/swimmer_v4.py index c23bd079b..a1f313fba 100644 --- a/gym/envs/mujoco/swimmer_v4.py +++ b/gym/envs/mujoco/swimmer_v4.py @@ -4,6 +4,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = {} @@ -152,8 +153,17 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) - - mujoco_env.MujocoEnv.__init__(self, "swimmer.xml", 4, **kwargs) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 + ) + mujoco_env.MujocoEnv.__init__( + self, "swimmer.xml", 4, observation_space=observation_space, **kwargs + ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) diff --git a/gym/envs/mujoco/walker2d.py b/gym/envs/mujoco/walker2d.py index 2ef82b97d..44cbec6f0 100644 --- a/gym/envs/mujoco/walker2d.py +++ b/gym/envs/mujoco/walker2d.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -17,8 +18,14 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): } def __init__(self, **kwargs): + observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64) mujoco_env.MujocoEnv.__init__( - self, "walker2d.xml", 4, mujoco_bindings="mujoco_py", **kwargs + self, + "walker2d.xml", + 4, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) utils.EzPickle.__init__(self) diff --git a/gym/envs/mujoco/walker2d_v3.py b/gym/envs/mujoco/walker2d_v3.py index 7aac151ea..29d6202f0 100644 --- a/gym/envs/mujoco/walker2d_v3.py +++ b/gym/envs/mujoco/walker2d_v3.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, @@ -53,8 +54,22 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + mujoco_env.MujocoEnv.__init__( - self, xml_file, 4, mujoco_bindings="mujoco_py", **kwargs + self, + xml_file, + 4, + mujoco_bindings="mujoco_py", + observation_space=observation_space, + **kwargs ) @property diff --git a/gym/envs/mujoco/walker2d_v4.py b/gym/envs/mujoco/walker2d_v4.py index b5811511e..48862a7e9 100644 --- a/gym/envs/mujoco/walker2d_v4.py +++ b/gym/envs/mujoco/walker2d_v4.py @@ -2,6 +2,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import mujoco_env +from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, @@ -182,7 +183,18 @@ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): exclude_current_positions_from_observation ) - mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4, **kwargs) + if exclude_current_positions_from_observation: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 + ) + else: + observation_space = Box( + low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 + ) + + mujoco_env.MujocoEnv.__init__( + self, "walker2d.xml", 4, observation_space=observation_space, **kwargs + ) @property def healthy_reward(self): diff --git a/tests/envs/test_mujoco.py b/tests/envs/test_mujoco.py index 45b5d0b45..894c74566 100644 --- a/tests/envs/test_mujoco.py +++ b/tests/envs/test_mujoco.py @@ -3,6 +3,7 @@ import pytest import gym from gym import envs +from gym.envs.registration import EnvSpec from tests.envs.utils import mujoco_testing_env_specs EPS = 1e-6 @@ -37,6 +38,65 @@ def verify_environments_match( break +EXCLUDE_POS_FROM_OBS = [ + "Ant", + "HalfCheetah", + "Hopper", + "Humanoid", + "Swimmer", + "Walker2d", +] + + +@pytest.mark.parametrize( + "env_spec", + mujoco_testing_env_specs, + ids=[env_spec.id for env_spec in mujoco_testing_env_specs], +) +def test_obs_space_mujoco_environments(env_spec: EnvSpec): + """Check that the returned observations are contained in the observation space of the environment""" + env = env_spec.make(disable_env_checker=True) + reset_obs = env.reset() + assert env.observation_space.contains( + reset_obs + ), f"Obseravtion returned by reset() of {env_spec.id} is not contained in the default observation space {env.observation_space}." + + action = env.action_space.sample() + step_obs, _, _, _ = env.step(action) + assert env.observation_space.contains( + step_obs + ), f"Obseravtion returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space}." + + if env_spec.name in EXCLUDE_POS_FROM_OBS and ( + env_spec.version == 4 or env_spec.version == 3 + ): + env = env_spec.make( + disable_env_checker=True, exclude_current_positions_from_observation=False + ) + reset_obs = env.reset() + assert env.observation_space.contains( + reset_obs + ), f"Obseravtion of {env_spec.id} is not contained in the default observation space {env.observation_space} when excluding current position from observation." + + step_obs, _, _, _ = env.step(action) + assert env.observation_space.contains( + step_obs + ), f"Obseravtion returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space} when excluding current position from observation." + + # Ant-v4 has the option of including contact forces in the observation space with the use_contact_forces argument + if env_spec.name == "Ant" and env_spec.version == 4: + env = env_spec.make(disable_env_checker=True, use_contact_forces=True) + reset_obs = env.reset() + assert env.observation_space.contains( + reset_obs + ), f"Obseravtion of {env_spec.id} is not contained in the default observation space {env.observation_space} when using contact forces." + + step_obs, _, _, _ = env.step(action) + assert env.observation_space.contains( + step_obs + ), f"Obseravtion returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space} when using contact forces." + + MUJOCO_V2_V3_ENVS = [ spec.name for spec in mujoco_testing_env_specs