Files
Gymnasium/tests/experimental/wrappers/test_lambda_rewards.py

111 lines
3.0 KiB
Python
Raw Normal View History

2022-11-20 00:57:10 +01:00
"""Test lambda reward wrapper."""
import numpy as np
import pytest
import gymnasium as gym
from gymnasium.error import InvalidBound
from gymnasium.experimental.wrappers import ClipRewardV0, LambdaRewardV0
2022-11-20 00:57:10 +01:00
2022-11-20 00:57:10 +01:00
ENV_ID = "CartPole-v1"
DISCRETE_ACTION = 0
NUM_ENVS = 3
SEED = 0
@pytest.mark.parametrize(
("reward_fn", "expected_reward"),
[(lambda r: 2 * r + 1, 3)],
)
def test_lambda_reward(reward_fn, expected_reward):
"""Test lambda reward.
Tests if function is correctly applied
to reward.
"""
env = gym.make(ENV_ID)
env = LambdaRewardV0(env, reward_fn)
env.reset(seed=SEED)
_, rew, _, _, _ = env.step(DISCRETE_ACTION)
assert rew == expected_reward
@pytest.mark.parametrize(
(
"reward_fn",
"expected_reward",
),
[(lambda r: 2 * r + 1, 3)],
)
def test_lambda_reward_within_vector(reward_fn, expected_reward):
"""Test lambda reward in vectorized environment.
Tests if function is correctly applied
to reward in a vectorized environment.
"""
actions = [DISCRETE_ACTION for _ in range(NUM_ENVS)]
env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
env = LambdaRewardV0(env, reward_fn)
env.reset(seed=SEED)
_, rew, _, _, _ = env.step(actions)
assert np.alltrue(rew == expected_reward)
@pytest.mark.parametrize(
("lower_bound", "upper_bound", "expected_reward"),
[(None, 0.5, 0.5), (0, None, 1), (0, 0.5, 0.5)],
)
def test_clip_reward(lower_bound, upper_bound, expected_reward):
"""Test reward clipping.
Test if reward is correctly clipped
accordingly to the input args.
"""
env = gym.make(ENV_ID)
env = ClipRewardV0(env, lower_bound, upper_bound)
2022-11-20 00:57:10 +01:00
env.reset(seed=SEED)
_, rew, _, _, _ = env.step(DISCRETE_ACTION)
assert rew == expected_reward
@pytest.mark.parametrize(
("lower_bound", "upper_bound", "expected_reward"),
[(None, 0.5, 0.5), (0, None, 1), (0, 0.5, 0.5)],
)
def test_clip_reward_within_vector(lower_bound, upper_bound, expected_reward):
"""Test reward clipping in vectorized environment.
Test if reward is correctly clipped
accordingly to the input args in a vectorized environment.
"""
actions = [DISCRETE_ACTION for _ in range(NUM_ENVS)]
env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
env = ClipRewardV0(env, lower_bound, upper_bound)
2022-11-20 00:57:10 +01:00
env.reset(seed=SEED)
_, rew, _, _, _ = env.step(actions)
assert np.alltrue(rew == expected_reward)
@pytest.mark.parametrize(
("lower_bound", "upper_bound"),
[(None, None), (1, -1), (np.array([1, 1]), np.array([0, 0]))],
)
def test_clip_reward_incorrect_params(lower_bound, upper_bound):
"""Test reward clipping with incorrect params.
Test whether passing wrong params to clip_rewards
correctly raise an exception.
clip_rewards should raise an exception if, both low and upper
bound of reward are `None` or if upper bound is lower than lower bound.
"""
env = gym.make(ENV_ID)
with pytest.raises(InvalidBound):
env = ClipRewardV0(env, lower_bound, upper_bound)