mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-20 22:12:03 +00:00
update pre-commit (#344)
This commit is contained in:
@@ -24,7 +24,7 @@ repos:
|
|||||||
args:
|
args:
|
||||||
- --ignore-words-list=reacher,ure,referenc,wile
|
- --ignore-words-list=reacher,ure,referenc,wile
|
||||||
- repo: https://github.com/PyCQA/flake8
|
- repo: https://github.com/PyCQA/flake8
|
||||||
rev: 5.0.4
|
rev: 6.0.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
args:
|
args:
|
||||||
@@ -35,7 +35,7 @@ repos:
|
|||||||
- --show-source
|
- --show-source
|
||||||
- --statistics
|
- --statistics
|
||||||
- repo: https://github.com/asottile/pyupgrade
|
- repo: https://github.com/asottile/pyupgrade
|
||||||
rev: v3.3.0
|
rev: v3.3.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: pyupgrade
|
- id: pyupgrade
|
||||||
args: ["--py37-plus"]
|
args: ["--py37-plus"]
|
||||||
@@ -44,11 +44,11 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: isort
|
- id: isort
|
||||||
- repo: https://github.com/python/black
|
- repo: https://github.com/python/black
|
||||||
rev: 22.10.0
|
rev: 23.1.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
- repo: https://github.com/pycqa/pydocstyle
|
- repo: https://github.com/pycqa/pydocstyle
|
||||||
rev: 6.1.1
|
rev: 6.3.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: pydocstyle
|
- id: pydocstyle
|
||||||
exclude: ^(gymnasium/envs/box2d)|(gymnasium/envs/classic_control)|(gymnasium/envs/mujoco)|(gymnasium/envs/toy_text)|(tests/envs)|(tests/spaces)|(tests/utils)|(tests/vector)|(tests/wrappers)|(docs/)
|
exclude: ^(gymnasium/envs/box2d)|(gymnasium/envs/classic_control)|(gymnasium/envs/mujoco)|(gymnasium/envs/toy_text)|(tests/envs)|(tests/spaces)|(tests/utils)|(tests/vector)|(tests/wrappers)|(docs/)
|
||||||
|
@@ -57,7 +57,6 @@ for env_spec in tqdm(gymnasium.envs.registry.values()):
|
|||||||
state, info = env.reset()
|
state, info = env.reset()
|
||||||
terminated, truncated = False, False
|
terminated, truncated = False, False
|
||||||
while not (terminated or truncated) and len(frames) <= LENGTH:
|
while not (terminated or truncated) and len(frames) <= LENGTH:
|
||||||
|
|
||||||
frame = env.render(mode="rgb_array")
|
frame = env.render(mode="rgb_array")
|
||||||
repeat = (
|
repeat = (
|
||||||
int(60 / env.metadata["render_fps"])
|
int(60 / env.metadata["render_fps"])
|
||||||
|
@@ -419,7 +419,6 @@ entropies = []
|
|||||||
|
|
||||||
# use tqdm to get a progress bar for training
|
# use tqdm to get a progress bar for training
|
||||||
for sample_phase in tqdm(range(n_updates)):
|
for sample_phase in tqdm(range(n_updates)):
|
||||||
|
|
||||||
# we don't have to reset the envs, they just continue playing
|
# we don't have to reset the envs, they just continue playing
|
||||||
# until the episode is over and then reset automatically
|
# until the episode is over and then reset automatically
|
||||||
|
|
||||||
@@ -435,7 +434,6 @@ for sample_phase in tqdm(range(n_updates)):
|
|||||||
|
|
||||||
# play n steps in our parallel environments to collect data
|
# play n steps in our parallel environments to collect data
|
||||||
for step in range(n_steps_per_update):
|
for step in range(n_steps_per_update):
|
||||||
|
|
||||||
# select an action A_{t} using S_{t} as input for the agent
|
# select an action A_{t} using S_{t} as input for the agent
|
||||||
actions, action_log_probs, state_value_preds, entropy = agent.select_action(
|
actions, action_log_probs, state_value_preds, entropy = agent.select_action(
|
||||||
states
|
states
|
||||||
@@ -674,7 +672,6 @@ for episode in range(n_showcase_episodes):
|
|||||||
# play one episode
|
# play one episode
|
||||||
done = False
|
done = False
|
||||||
while not done:
|
while not done:
|
||||||
|
|
||||||
# select an action A_{t} using S_{t} as input for the agent
|
# select an action A_{t} using S_{t} as input for the agent
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
action, _, _, _ = agent.select_action(state[None, :])
|
action, _, _, _ = agent.select_action(state[None, :])
|
||||||
|
@@ -155,7 +155,8 @@ class Car:
|
|||||||
"""control: brake
|
"""control: brake
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation"""
|
b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation
|
||||||
|
"""
|
||||||
for w in self.wheels:
|
for w in self.wheels:
|
||||||
w.brake = b
|
w.brake = b
|
||||||
|
|
||||||
@@ -163,7 +164,8 @@ class Car:
|
|||||||
"""control: steer
|
"""control: steer
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
s (-1..1): target position, it takes time to rotate steering wheel from side-to-side"""
|
s (-1..1): target position, it takes time to rotate steering wheel from side-to-side
|
||||||
|
"""
|
||||||
self.wheels[0].steer = s
|
self.wheels[0].steer = s
|
||||||
self.wheels[1].steer = s
|
self.wheels[1].steer = s
|
||||||
|
|
||||||
|
@@ -790,7 +790,6 @@ def heuristic(env, s):
|
|||||||
|
|
||||||
|
|
||||||
def demo_heuristic_lander(env, seed=None, render=False):
|
def demo_heuristic_lander(env, seed=None, render=False):
|
||||||
|
|
||||||
total_reward = 0
|
total_reward = 0
|
||||||
steps = 0
|
steps = 0
|
||||||
s, info = env.reset(seed=seed)
|
s, info = env.reset(seed=seed)
|
||||||
|
@@ -343,7 +343,7 @@ class AcrobotEnv(Env):
|
|||||||
color=(0, 0, 0),
|
color=(0, 0, 0),
|
||||||
)
|
)
|
||||||
|
|
||||||
for ((x, y), th, llen) in zip(xys, thetas, link_lengths):
|
for (x, y), th, llen in zip(xys, thetas, link_lengths):
|
||||||
x = x + offset
|
x = x + offset
|
||||||
y = y + offset
|
y = y + offset
|
||||||
l, r, t, b = 0, llen, 0.1 * scale, -0.1 * scale
|
l, r, t, b = 0, llen, 0.1 * scale, -0.1 * scale
|
||||||
@@ -456,7 +456,6 @@ def rk4(derivs, y0, t):
|
|||||||
yout[0] = y0
|
yout[0] = y0
|
||||||
|
|
||||||
for i in np.arange(len(t) - 1):
|
for i in np.arange(len(t) - 1):
|
||||||
|
|
||||||
this = t[i]
|
this = t[i]
|
||||||
dt = t[i + 1] - this
|
dt = t[i + 1] - this
|
||||||
dt2 = dt / 2.0
|
dt2 = dt / 2.0
|
||||||
|
@@ -144,7 +144,6 @@ class Continuous_MountainCarEnv(gym.Env):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: np.ndarray):
|
||||||
|
|
||||||
position = self.state[0]
|
position = self.state[0]
|
||||||
velocity = self.state[1]
|
velocity = self.state[1]
|
||||||
force = min(max(action[0], self.min_action), self.max_action)
|
force = min(max(action[0], self.min_action), self.max_action)
|
||||||
|
@@ -31,7 +31,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
contact_force_range=(-1.0, 1.0),
|
contact_force_range=(-1.0, 1.0),
|
||||||
reset_noise_scale=0.1,
|
reset_noise_scale=0.1,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -44,7 +44,7 @@ class AntEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
contact_force_range,
|
contact_force_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._ctrl_cost_weight = ctrl_cost_weight
|
self._ctrl_cost_weight = ctrl_cost_weight
|
||||||
|
@@ -202,7 +202,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
|||||||
contact_force_range=(-1.0, 1.0),
|
contact_force_range=(-1.0, 1.0),
|
||||||
reset_noise_scale=0.1,
|
reset_noise_scale=0.1,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -216,7 +216,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
|||||||
contact_force_range,
|
contact_force_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._ctrl_cost_weight = ctrl_cost_weight
|
self._ctrl_cost_weight = ctrl_cost_weight
|
||||||
@@ -252,7 +252,7 @@ class AntEnv(MujocoEnv, utils.EzPickle):
|
|||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@@ -29,7 +29,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight=0.1,
|
ctrl_cost_weight=0.1,
|
||||||
reset_noise_scale=0.1,
|
reset_noise_scale=0.1,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -38,7 +38,7 @@ class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight,
|
ctrl_cost_weight,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
|
@@ -150,7 +150,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight=0.1,
|
ctrl_cost_weight=0.1,
|
||||||
reset_noise_scale=0.1,
|
reset_noise_scale=0.1,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -158,7 +158,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight,
|
ctrl_cost_weight,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
@@ -186,7 +186,7 @@ class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
|
|||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def control_cost(self, action):
|
def control_cost(self, action):
|
||||||
|
@@ -37,7 +37,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
healthy_angle_range=(-0.2, 0.2),
|
healthy_angle_range=(-0.2, 0.2),
|
||||||
reset_noise_scale=5e-3,
|
reset_noise_scale=5e-3,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -51,7 +51,7 @@ class HopperEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
healthy_angle_range,
|
healthy_angle_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
|
@@ -161,7 +161,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
|||||||
healthy_angle_range=(-0.2, 0.2),
|
healthy_angle_range=(-0.2, 0.2),
|
||||||
reset_noise_scale=5e-3,
|
reset_noise_scale=5e-3,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -174,7 +174,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
|||||||
healthy_angle_range,
|
healthy_angle_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
@@ -209,7 +209,7 @@ class HopperEnv(MujocoEnv, utils.EzPickle):
|
|||||||
4,
|
4,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@@ -41,7 +41,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
healthy_z_range=(1.0, 2.0),
|
healthy_z_range=(1.0, 2.0),
|
||||||
reset_noise_scale=1e-2,
|
reset_noise_scale=1e-2,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -55,7 +55,7 @@ class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
healthy_z_range,
|
healthy_z_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
|
@@ -233,7 +233,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
|||||||
healthy_z_range=(1.0, 2.0),
|
healthy_z_range=(1.0, 2.0),
|
||||||
reset_noise_scale=1e-2,
|
reset_noise_scale=1e-2,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -244,7 +244,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
|||||||
healthy_z_range,
|
healthy_z_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
@@ -274,7 +274,7 @@ class HumanoidEnv(MujocoEnv, utils.EzPickle):
|
|||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@@ -24,7 +24,7 @@ class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
"humanoidstandup.xml",
|
"humanoidstandup.xml",
|
||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
utils.EzPickle.__init__(self, **kwargs)
|
utils.EzPickle.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
@@ -210,7 +210,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
|
|||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
utils.EzPickle.__init__(self, **kwargs)
|
utils.EzPickle.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
@@ -22,7 +22,7 @@ class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
"inverted_double_pendulum.xml",
|
"inverted_double_pendulum.xml",
|
||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
utils.EzPickle.__init__(self, **kwargs)
|
utils.EzPickle.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
@@ -141,7 +141,7 @@ class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
|
|||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
utils.EzPickle.__init__(self, **kwargs)
|
utils.EzPickle.__init__(self, **kwargs)
|
||||||
|
|
||||||
|
@@ -23,7 +23,7 @@ class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
"inverted_pendulum.xml",
|
"inverted_pendulum.xml",
|
||||||
2,
|
2,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
|
@@ -111,7 +111,7 @@ class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
|
|||||||
2,
|
2,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
|
@@ -153,7 +153,6 @@ class OffScreenViewer(BaseRender):
|
|||||||
self.cam.distance = self.model.stat.extent
|
self.cam.distance = self.model.stat.extent
|
||||||
|
|
||||||
def _get_opengl_backend(self, width: int, height: int):
|
def _get_opengl_backend(self, width: int, height: int):
|
||||||
|
|
||||||
self.backend = os.environ.get("MUJOCO_GL")
|
self.backend = os.environ.get("MUJOCO_GL")
|
||||||
if self.backend is not None:
|
if self.backend is not None:
|
||||||
try:
|
try:
|
||||||
@@ -197,7 +196,6 @@ class OffScreenViewer(BaseRender):
|
|||||||
camera_id: Optional[int] = None,
|
camera_id: Optional[int] = None,
|
||||||
segmentation: bool = False,
|
segmentation: bool = False,
|
||||||
):
|
):
|
||||||
|
|
||||||
if camera_id is not None:
|
if camera_id is not None:
|
||||||
if camera_id == -1:
|
if camera_id == -1:
|
||||||
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
|
self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE
|
||||||
@@ -348,6 +346,7 @@ class WindowViewer(BaseRender):
|
|||||||
6. Swap front and back buffer, https://www.glfw.org/docs/3.3/quick.html.
|
6. Swap front and back buffer, https://www.glfw.org/docs/3.3/quick.html.
|
||||||
7. Poll events like mouse clicks or keyboard input.
|
7. Poll events like mouse clicks or keyboard input.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# mjv_updateScene, mjr_render, mjr_overlay
|
# mjv_updateScene, mjr_render, mjr_overlay
|
||||||
def update():
|
def update():
|
||||||
# fill overlay items
|
# fill overlay items
|
||||||
|
@@ -157,7 +157,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
5,
|
5,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
|
@@ -139,7 +139,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
|
|||||||
2,
|
2,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
|
@@ -27,7 +27,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight=1e-4,
|
ctrl_cost_weight=1e-4,
|
||||||
reset_noise_scale=0.1,
|
reset_noise_scale=0.1,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -36,7 +36,7 @@ class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight,
|
ctrl_cost_weight,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
|
@@ -139,7 +139,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight=1e-4,
|
ctrl_cost_weight=1e-4,
|
||||||
reset_noise_scale=0.1,
|
reset_noise_scale=0.1,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -147,7 +147,7 @@ class SwimmerEnv(MujocoEnv, utils.EzPickle):
|
|||||||
ctrl_cost_weight,
|
ctrl_cost_weight,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
|
@@ -34,7 +34,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
healthy_angle_range=(-1.0, 1.0),
|
healthy_angle_range=(-1.0, 1.0),
|
||||||
reset_noise_scale=5e-3,
|
reset_noise_scale=5e-3,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -47,7 +47,7 @@ class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
|
|||||||
healthy_angle_range,
|
healthy_angle_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
|
@@ -165,7 +165,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
|||||||
healthy_angle_range=(-1.0, 1.0),
|
healthy_angle_range=(-1.0, 1.0),
|
||||||
reset_noise_scale=5e-3,
|
reset_noise_scale=5e-3,
|
||||||
exclude_current_positions_from_observation=True,
|
exclude_current_positions_from_observation=True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
utils.EzPickle.__init__(
|
utils.EzPickle.__init__(
|
||||||
self,
|
self,
|
||||||
@@ -177,7 +177,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
|||||||
healthy_angle_range,
|
healthy_angle_range,
|
||||||
reset_noise_scale,
|
reset_noise_scale,
|
||||||
exclude_current_positions_from_observation,
|
exclude_current_positions_from_observation,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
@@ -210,7 +210,7 @@ class Walker2dEnv(MujocoEnv, utils.EzPickle):
|
|||||||
4,
|
4,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
default_camera_config=DEFAULT_CAMERA_CONFIG,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@@ -215,7 +215,6 @@ class FunctionalJaxVectorEnv(gym.experimental.vector.VectorEnv):
|
|||||||
|
|
||||||
done = jnp.logical_or(terminated, truncated)
|
done = jnp.logical_or(terminated, truncated)
|
||||||
if jnp.any(done):
|
if jnp.any(done):
|
||||||
|
|
||||||
final_obs = self.func_env.observation(next_state)
|
final_obs = self.func_env.observation(next_state)
|
||||||
|
|
||||||
to_reset = jnp.where(done)[0]
|
to_reset = jnp.where(done)[0]
|
||||||
|
@@ -98,7 +98,6 @@ class SyncVectorEnv(VectorEnv):
|
|||||||
observations = []
|
observations = []
|
||||||
infos = {}
|
infos = {}
|
||||||
for i, (env, single_seed) in enumerate(zip(self.envs, seed)):
|
for i, (env, single_seed) in enumerate(zip(self.envs, seed)):
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if single_seed is not None:
|
if single_seed is not None:
|
||||||
kwargs["seed"] = single_seed
|
kwargs["seed"] = single_seed
|
||||||
@@ -124,7 +123,6 @@ class SyncVectorEnv(VectorEnv):
|
|||||||
|
|
||||||
observations, infos = [], {}
|
observations, infos = [], {}
|
||||||
for i, (env, action) in enumerate(zip(self.envs, actions)):
|
for i, (env, action) in enumerate(zip(self.envs, actions)):
|
||||||
|
|
||||||
(
|
(
|
||||||
observation,
|
observation,
|
||||||
self._rewards[i],
|
self._rewards[i],
|
||||||
|
@@ -113,7 +113,6 @@ class SyncVectorEnv(VectorEnv):
|
|||||||
observations = []
|
observations = []
|
||||||
infos = {}
|
infos = {}
|
||||||
for i, (env, single_seed) in enumerate(zip(self.envs, seed)):
|
for i, (env, single_seed) in enumerate(zip(self.envs, seed)):
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if single_seed is not None:
|
if single_seed is not None:
|
||||||
kwargs["seed"] = single_seed
|
kwargs["seed"] = single_seed
|
||||||
@@ -141,7 +140,6 @@ class SyncVectorEnv(VectorEnv):
|
|||||||
"""
|
"""
|
||||||
observations, infos = [], {}
|
observations, infos = [], {}
|
||||||
for i, (env, action) in enumerate(zip(self.envs, self._actions)):
|
for i, (env, action) in enumerate(zip(self.envs, self._actions)):
|
||||||
|
|
||||||
(
|
(
|
||||||
observation,
|
observation,
|
||||||
self._rewards[i],
|
self._rewards[i],
|
||||||
|
@@ -44,7 +44,6 @@ class AutoResetWrapper(gym.Wrapper):
|
|||||||
"""
|
"""
|
||||||
obs, reward, terminated, truncated, info = self.env.step(action)
|
obs, reward, terminated, truncated, info = self.env.step(action)
|
||||||
if terminated or truncated:
|
if terminated or truncated:
|
||||||
|
|
||||||
new_obs, new_info = self.env.reset()
|
new_obs, new_info = self.env.reset()
|
||||||
assert (
|
assert (
|
||||||
"final_observation" not in new_info
|
"final_observation" not in new_info
|
||||||
|
Reference in New Issue
Block a user