2019-06-08 00:56:56 +02:00
|
|
|
import numpy as np
|
|
|
|
|
2022-09-16 23:41:27 +01:00
|
|
|
import gymnasium as gym
|
2022-09-08 10:10:07 +01:00
|
|
|
from gymnasium.wrappers import ClipAction
|
2019-06-08 00:56:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
def test_clip_action():
|
|
|
|
# mountaincar: action-based rewards
|
2022-09-16 23:41:27 +01:00
|
|
|
env = gym.make("MountainCarContinuous-v0", disable_env_checker=True)
|
2022-06-16 14:29:13 +01:00
|
|
|
wrapped_env = ClipAction(
|
2022-09-16 23:41:27 +01:00
|
|
|
gym.make("MountainCarContinuous-v0", disable_env_checker=True)
|
2022-06-16 14:29:13 +01:00
|
|
|
)
|
2019-06-08 00:56:56 +02:00
|
|
|
|
|
|
|
seed = 0
|
|
|
|
|
2021-12-08 22:14:15 +01:00
|
|
|
env.reset(seed=seed)
|
|
|
|
wrapped_env.reset(seed=seed)
|
2019-06-08 00:56:56 +02:00
|
|
|
|
2021-07-29 02:26:34 +02:00
|
|
|
actions = [[0.4], [1.2], [-0.3], [0.0], [-2.5]]
|
2019-06-08 00:56:56 +02:00
|
|
|
for action in actions:
|
2022-08-30 19:41:59 +05:30
|
|
|
obs1, r1, ter1, trunc1, _ = env.step(
|
2021-07-29 15:39:42 -04:00
|
|
|
np.clip(action, env.action_space.low, env.action_space.high)
|
|
|
|
)
|
2022-08-30 19:41:59 +05:30
|
|
|
obs2, r2, ter2, trunc2, _ = wrapped_env.step(action)
|
2019-06-08 00:56:56 +02:00
|
|
|
assert np.allclose(r1, r2)
|
|
|
|
assert np.allclose(obs1, obs2)
|
2022-08-30 19:41:59 +05:30
|
|
|
assert ter1 == ter2
|
|
|
|
assert trunc1 == trunc2
|