Gymnasium/gym/wrappers/normalize.py

from typing import Optional

import numpy as np
import gym


# taken from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py
class RunningMeanStd:
    # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
    def __init__(self, epsilon=1e-4, shape=()):
        self.mean = np.zeros(shape, "float64")
        self.var = np.ones(shape, "float64")
        self.count = epsilon

    def update(self, x):
        batch_mean = np.mean(x, axis=0)
        batch_var = np.var(x, axis=0)
        batch_count = x.shape[0]
        self.update_from_moments(batch_mean, batch_var, batch_count)

    def update_from_moments(self, batch_mean, batch_var, batch_count):
        self.mean, self.var, self.count = update_mean_var_count_from_moments(
            self.mean, self.var, self.count, batch_mean, batch_var, batch_count
        )


def update_mean_var_count_from_moments(
    mean, var, count, batch_mean, batch_var, batch_count
):
    delta = batch_mean - mean
    tot_count = count + batch_count

    new_mean = mean + delta * batch_count / tot_count
    m_a = var * count
    m_b = batch_var * batch_count
    M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count
    new_var = M2 / tot_count
    new_count = tot_count

    return new_mean, new_var, new_count


class NormalizeObservation(gym.core.Wrapper):
    def __init__(
        self,
        env,
        epsilon=1e-8,
    ):
        super().__init__(env)
        self.num_envs = getattr(env, "num_envs", 1)
        self.is_vector_env = getattr(env, "is_vector_env", False)
        if self.is_vector_env:
            self.obs_rms = RunningMeanStd(shape=self.single_observation_space.shape)
        else:
            self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)
        self.epsilon = epsilon

    def step(self, action):
        obs, rews, dones, infos = self.env.step(action)
        if self.is_vector_env:
            obs = self.normalize(obs)
        else:
            obs = self.normalize(np.array([obs]))[0]
        return obs, rews, dones, infos

    def reset(
        self,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        obs = None
        info = None
        if not return_info:
            obs = self.env.reset(seed=seed, options=options)
        else:
            obs, info = self.env.reset(seed=seed, return_info=True, options=options)
        if self.is_vector_env:
            obs = self.normalize(obs)
        else:
            obs = self.normalize(np.array([obs]))[0]
        if not return_info:
            return obs
        else:
            return obs, info

    def normalize(self, obs):
        self.obs_rms.update(obs)
        return (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.epsilon)


class NormalizeReward(gym.core.Wrapper):
    def __init__(
        self,
        env,
        gamma=0.99,
        epsilon=1e-8,
    ):
        super().__init__(env)
        self.num_envs = getattr(env, "num_envs", 1)
        self.is_vector_env = getattr(env, "is_vector_env", False)
        self.return_rms = RunningMeanStd(shape=())
        self.returns = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon

    def step(self, action):
        obs, rews, dones, infos = self.env.step(action)
        if not self.is_vector_env:
            rews = np.array([rews])
        self.returns = self.returns * self.gamma + rews
        rews = self.normalize(rews)
        self.returns[dones] = 0.0
        if not self.is_vector_env:
            rews = rews[0]
        return obs, rews, dones, infos

    def normalize(self, rews):
        self.return_rms.update(self.returns)
        return rews / np.sqrt(self.return_rms.var + self.epsilon)
Seeding update (#2422) * Ditch most of the seeding.py and replace np_random with the numpy default_rng. Let's see if tests pass * Updated a bunch of RNG calls from the RandomState API to Generator API * black; didn't expect that, did ya? * Undo a typo * blaaack * More typo fixes * Fixed setting/getting state in multidiscrete spaces * Fix typo, fix a test to work with the new sampling * Correctly (?) pass the randomly generated seed if np_random is called with None as seed * Convert the Discrete sample to a python int (as opposed to np.int64) * Remove some redundant imports * First version of the compatibility layer for old-style RNG. Mainly to trigger tests. * Removed redundant f-strings * Style fixes, removing unused imports * Try to make tests pass by removing atari from the dockerfile * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * Try to make tests pass by removing atari from the setup * First attempt at deprecating `env.seed` and supporting `env.reset(seed=seed)` instead. Tests should hopefully pass but throw up a million warnings. * black; didn't expect that, didya? * Rename the reset parameter in VecEnvs back to `seed` * Updated tests to use the new seeding method * Removed a bunch of old `seed` calls. Fixed a bug in AsyncVectorEnv * Stop Discrete envs from doing part of the setup (and using the randomness) in init (as opposed to reset) * Add explicit seed to wrappers reset * Remove an accidental return * Re-add some legacy functions with a warning. * Use deprecation instead of regular warnings for the newly deprecated methods/functions 2021-12-08 22:14:15 +01:00			`from typing import Optional`

Add Normalize env (#2387) * initial commit * undo black * add code * add test cases and refactor * add docs * black * documentation update * break feature apart * quick fix * quick fix * quick fix * update documentation * update documentation * Update wrapper naming * fix ci 2021-09-09 15:57:10 -04:00			`import numpy as np`
			`import gym`


			`# taken from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py`
Py36+ syntax in gym/wrappers: derived by running `pyupgrade --py36-plus gym/wrappers/**.py` and `flynt gym --ll 120` (#2464) Co-authored-by: Ilya Kamen <ilya.kamenshchikov@bosch.com> 2021-11-14 01:53:06 +01:00			`class RunningMeanStd:`
Add Normalize env (#2387) * initial commit * undo black * add code * add test cases and refactor * add docs * black * documentation update * break feature apart * quick fix * quick fix * quick fix * update documentation * update documentation * Update wrapper naming * fix ci 2021-09-09 15:57:10 -04:00			`# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm`
			`def __init__(self, epsilon=1e-4, shape=()):`
			`self.mean = np.zeros(shape, "float64")`
			`self.var = np.ones(shape, "float64")`
			`self.count = epsilon`

			`def update(self, x):`
			`batch_mean = np.mean(x, axis=0)`
			`batch_var = np.var(x, axis=0)`
			`batch_count = x.shape[0]`
			`self.update_from_moments(batch_mean, batch_var, batch_count)`

			`def update_from_moments(self, batch_mean, batch_var, batch_count):`
			`self.mean, self.var, self.count = update_mean_var_count_from_moments(`
			`self.mean, self.var, self.count, batch_mean, batch_var, batch_count`
			`)`


			`def update_mean_var_count_from_moments(`
			`mean, var, count, batch_mean, batch_var, batch_count`
			`):`
			`delta = batch_mean - mean`
			`tot_count = count + batch_count`

			`new_mean = mean + delta * batch_count / tot_count`
			`m_a = var * count`
			`m_b = batch_var * batch_count`
			`M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count`
			`new_var = M2 / tot_count`
			`new_count = tot_count`

			`return new_mean, new_var, new_count`


			`class NormalizeObservation(gym.core.Wrapper):`
			`def __init__(`
			`self,`
			`env,`
			`epsilon=1e-8,`
			`):`
Py36+ syntax in gym/wrappers: derived by running `pyupgrade --py36-plus gym/wrappers/**.py` and `flynt gym --ll 120` (#2464) Co-authored-by: Ilya Kamen <ilya.kamenshchikov@bosch.com> 2021-11-14 01:53:06 +01:00			`super().__init__(env)`
Add Normalize env (#2387) * initial commit * undo black * add code * add test cases and refactor * add docs * black * documentation update * break feature apart * quick fix * quick fix * quick fix * update documentation * update documentation * Update wrapper naming * fix ci 2021-09-09 15:57:10 -04:00			`self.num_envs = getattr(env, "num_envs", 1)`
			`self.is_vector_env = getattr(env, "is_vector_env", False)`
			`if self.is_vector_env:`
			`self.obs_rms = RunningMeanStd(shape=self.single_observation_space.shape)`
			`else:`
			`self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)`
			`self.epsilon = epsilon`

			`def step(self, action):`
			`obs, rews, dones, infos = self.env.step(action)`
			`if self.is_vector_env:`
			`obs = self.normalize(obs)`
			`else:`
			`obs = self.normalize(np.array([obs]))[0]`
			`return obs, rews, dones, infos`

Adding return_info argument to reset to allow for optional info dict as a second return value (#2546) * initial draft of optional info dict in reset function, implemented for cartpole, tests seem to be passing * merged core.py * updated return type annotation for reset function in core.py * optional metadata with return_info from reset added for all first party environments, with corresponding tests. Incomplete implementation for wrappers and vector wrappers * removed Optional type for return_info arguments * added tests for return_info to normalize wrapper and sync_vector_env * autoformatted using black * added optional reset metadata tests to several wrappers * added return_info capability to async_vector_env.py and test to verify functionality * added optional return_info test for record_video.py * removed tests for mujoco environments * autoformatted * improved test coverage for optional reset return_info * re-removed unit test envs accidentally reintroduced in merge * removed unnecessary import * changes based on code-review * small fix to core wrapper typing and autoformatted record_epsisode_stats * small change to pass flake8 style 2022-02-06 17:28:27 -06:00			`def reset(`
			`self,`
			`seed: Optional[int] = None,`
			`return_info: bool = False,`
			`options: Optional[dict] = None,`
			`):`
			`obs = None`
			`info = None`
			`if not return_info:`
			`obs = self.env.reset(seed=seed, options=options)`
			`else:`
			`obs, info = self.env.reset(seed=seed, return_info=True, options=options)`
Add Normalize env (#2387) * initial commit * undo black * add code * add test cases and refactor * add docs * black * documentation update * break feature apart * quick fix * quick fix * quick fix * update documentation * update documentation * Update wrapper naming * fix ci 2021-09-09 15:57:10 -04:00			`if self.is_vector_env:`
			`obs = self.normalize(obs)`
			`else:`
			`obs = self.normalize(np.array([obs]))[0]`
Adding return_info argument to reset to allow for optional info dict as a second return value (#2546) * initial draft of optional info dict in reset function, implemented for cartpole, tests seem to be passing * merged core.py * updated return type annotation for reset function in core.py * optional metadata with return_info from reset added for all first party environments, with corresponding tests. Incomplete implementation for wrappers and vector wrappers * removed Optional type for return_info arguments * added tests for return_info to normalize wrapper and sync_vector_env * autoformatted using black * added optional reset metadata tests to several wrappers * added return_info capability to async_vector_env.py and test to verify functionality * added optional return_info test for record_video.py * removed tests for mujoco environments * autoformatted * improved test coverage for optional reset return_info * re-removed unit test envs accidentally reintroduced in merge * removed unnecessary import * changes based on code-review * small fix to core wrapper typing and autoformatted record_epsisode_stats * small change to pass flake8 style 2022-02-06 17:28:27 -06:00			`if not return_info:`
			`return obs`
			`else:`
			`return obs, info`
Add Normalize env (#2387) * initial commit * undo black * add code * add test cases and refactor * add docs * black * documentation update * break feature apart * quick fix * quick fix * quick fix * update documentation * update documentation * Update wrapper naming * fix ci 2021-09-09 15:57:10 -04:00
			`def normalize(self, obs):`
			`self.obs_rms.update(obs)`
			`return (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.epsilon)`


			`class NormalizeReward(gym.core.Wrapper):`
			`def __init__(`
			`self,`
			`env,`
			`gamma=0.99,`
			`epsilon=1e-8,`
			`):`
Py36+ syntax in gym/wrappers: derived by running `pyupgrade --py36-plus gym/wrappers/**.py` and `flynt gym --ll 120` (#2464) Co-authored-by: Ilya Kamen <ilya.kamenshchikov@bosch.com> 2021-11-14 01:53:06 +01:00			`super().__init__(env)`
Add Normalize env (#2387) * initial commit * undo black * add code * add test cases and refactor * add docs * black * documentation update * break feature apart * quick fix * quick fix * quick fix * update documentation * update documentation * Update wrapper naming * fix ci 2021-09-09 15:57:10 -04:00			`self.num_envs = getattr(env, "num_envs", 1)`
			`self.is_vector_env = getattr(env, "is_vector_env", False)`
			`self.return_rms = RunningMeanStd(shape=())`
			`self.returns = np.zeros(self.num_envs)`
			`self.gamma = gamma`
			`self.epsilon = epsilon`

			`def step(self, action):`
			`obs, rews, dones, infos = self.env.step(action)`
			`if not self.is_vector_env:`
			`rews = np.array([rews])`
			`self.returns = self.returns * self.gamma + rews`
			`rews = self.normalize(rews)`
			`self.returns[dones] = 0.0`
			`if not self.is_vector_env:`
			`rews = rews[0]`
			`return obs, rews, dones, infos`

			`def normalize(self, rews):`
			`self.return_rms.update(self.returns)`
			`return rews / np.sqrt(self.return_rms.var + self.epsilon)`