2019-06-08 00:56:56 +02:00
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
import gym
|
|
|
|
from gym.wrappers import ClipAction
|
|
|
|
|
|
|
|
|
|
|
|
def test_clip_action():
|
|
|
|
# mountaincar: action-based rewards
|
2021-07-29 02:26:34 +02:00
|
|
|
make_env = lambda: gym.make("MountainCarContinuous-v0")
|
2019-06-08 00:56:56 +02:00
|
|
|
env = make_env()
|
|
|
|
wrapped_env = ClipAction(make_env())
|
|
|
|
|
|
|
|
seed = 0
|
|
|
|
|
2021-12-08 22:14:15 +01:00
|
|
|
env.reset(seed=seed)
|
|
|
|
wrapped_env.reset(seed=seed)
|
2019-06-08 00:56:56 +02:00
|
|
|
|
2021-07-29 02:26:34 +02:00
|
|
|
actions = [[0.4], [1.2], [-0.3], [0.0], [-2.5]]
|
2019-06-08 00:56:56 +02:00
|
|
|
for action in actions:
|
2021-07-29 15:39:42 -04:00
|
|
|
obs1, r1, d1, _ = env.step(
|
|
|
|
np.clip(action, env.action_space.low, env.action_space.high)
|
|
|
|
)
|
2019-06-08 00:56:56 +02:00
|
|
|
obs2, r2, d2, _ = wrapped_env.step(action)
|
|
|
|
assert np.allclose(r1, r2)
|
|
|
|
assert np.allclose(obs1, obs2)
|
|
|
|
assert d1 == d2
|