"""Test lambda reward wrapper."""

import numpy as np
import pytest

import gymnasium as gym
from gymnasium.error import InvalidBound
from gymnasium.experimental.wrappers import ClipRewardV0, LambdaRewardV0

ENV_ID = "CartPole-v1"
DISCRETE_ACTION = 0
NUM_ENVS = 3
SEED = 0


@pytest.mark.parametrize(
    ("reward_fn", "expected_reward"),
    [(lambda r: 2 * r + 1, 3)],
)
def test_lambda_reward(reward_fn, expected_reward):
    """Test lambda reward.

    Tests if function is correctly applied
    to reward.
    """
    env = gym.make(ENV_ID)
    env = LambdaRewardV0(env, reward_fn)
    env.reset(seed=SEED)

    _, rew, _, _, _ = env.step(DISCRETE_ACTION)

    assert rew == expected_reward


@pytest.mark.parametrize(
    (
        "reward_fn",
        "expected_reward",
    ),
    [(lambda r: 2 * r + 1, 3)],
)
def test_lambda_reward_within_vector(reward_fn, expected_reward):
    """Test lambda reward in vectorized environment.

    Tests if function is correctly applied
    to reward in a vectorized environment.
    """
    actions = [DISCRETE_ACTION for _ in range(NUM_ENVS)]
    env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
    env = LambdaRewardV0(env, reward_fn)
    env.reset(seed=SEED)

    _, rew, _, _, _ = env.step(actions)

    assert np.alltrue(rew == expected_reward)


@pytest.mark.parametrize(
    ("lower_bound", "upper_bound", "expected_reward"),
    [(None, 0.5, 0.5), (0, None, 1), (0, 0.5, 0.5)],
)
def test_clip_reward(lower_bound, upper_bound, expected_reward):
    """Test reward clipping.
    Test if reward is correctly clipped
    accordingly to the input args.
    """
    env = gym.make(ENV_ID)
    env = ClipRewardV0(env, lower_bound, upper_bound)
    env.reset(seed=SEED)
    _, rew, _, _, _ = env.step(DISCRETE_ACTION)

    assert rew == expected_reward


@pytest.mark.parametrize(
    ("lower_bound", "upper_bound", "expected_reward"),
    [(None, 0.5, 0.5), (0, None, 1), (0, 0.5, 0.5)],
)
def test_clip_reward_within_vector(lower_bound, upper_bound, expected_reward):
    """Test reward clipping in vectorized environment.
    Test if reward is correctly clipped
    accordingly to the input args in a vectorized environment.
    """
    actions = [DISCRETE_ACTION for _ in range(NUM_ENVS)]

    env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS)
    env = ClipRewardV0(env, lower_bound, upper_bound)
    env.reset(seed=SEED)

    _, rew, _, _, _ = env.step(actions)

    assert np.alltrue(rew == expected_reward)


@pytest.mark.parametrize(
    ("lower_bound", "upper_bound"),
    [(None, None), (1, -1), (np.array([1, 1]), np.array([0, 0]))],
)
def test_clip_reward_incorrect_params(lower_bound, upper_bound):
    """Test reward clipping with incorrect params.
    Test whether passing wrong params to clip_rewards
    correctly raise an exception.
    clip_rewards should raise an exception if, both low and upper
    bound of reward are `None` or if upper bound is lower than lower bound.
    """
    env = gym.make(ENV_ID)

    with pytest.raises(InvalidBound):
        env = ClipRewardV0(env, lower_bound, upper_bound)