mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-01 06:07:08 +00:00
Fix experimental normalize reward wrapper (#277)
Co-authored-by: raphajaner <raphael.trumpp@tum.de>
This commit is contained in:
@@ -98,6 +98,10 @@ class NormalizeRewardV0(gym.Wrapper):
|
||||
|
||||
The exponential moving average will have variance :math:`(1 - \gamma)^2`.
|
||||
|
||||
The property `_update_running_mean` allows to freeze/continue the running mean calculation of the reward
|
||||
statistics. If `True` (default), the `RunningMeanStd` will get updated every time `self.normalize()` is called.
|
||||
If False, the calculated statistics are used but not updated anymore; this may be used during evaluation.
|
||||
|
||||
Note:
|
||||
The scaling depends on past trajectories and rewards will not be scaled correctly if the wrapper was newly
|
||||
instantiated or the policy was changed recently.
|
||||
@@ -118,7 +122,7 @@ class NormalizeRewardV0(gym.Wrapper):
|
||||
"""
|
||||
super().__init__(env)
|
||||
self.rewards_running_means = RunningMeanStd(shape=())
|
||||
self.discounted_reward: float = 0.0
|
||||
self.discounted_reward: np.array = np.array([0.0])
|
||||
self.gamma = gamma
|
||||
self.epsilon = epsilon
|
||||
|
||||
|
@@ -1 +1,26 @@
|
||||
"""Test suite for NormalizeRewardV0."""
|
||||
import numpy as np
|
||||
|
||||
from gymnasium.core import ActType
|
||||
from gymnasium.experimental.wrappers import NormalizeRewardV0
|
||||
from tests.testing_env import GenericTestEnv
|
||||
|
||||
|
||||
def _make_reward_env():
|
||||
"""Function that returns a `GenericTestEnv` with reward=1."""
|
||||
|
||||
def step_func(self, action: ActType):
|
||||
return self.observation_space.sample(), 1.0, False, False, {}
|
||||
|
||||
return GenericTestEnv(step_func=step_func)
|
||||
|
||||
|
||||
def test_normalize_reward_wrapper():
|
||||
"""Tests that the NormalizeReward does not throw an error."""
|
||||
# TODO: Functional correctness should be tested
|
||||
env = _make_reward_env()
|
||||
wrapped_env = NormalizeRewardV0(env)
|
||||
wrapped_env.reset()
|
||||
_, reward, _, _, _ = wrapped_env.step(None)
|
||||
assert np.ndim(reward) == 0
|
||||
env.close()
|
||||
|
Reference in New Issue
Block a user