From b05be68c55d416f7aa71f745aaf3c2999d83b83c Mon Sep 17 00:00:00 2001 From: John Schulman Date: Thu, 16 Nov 2017 22:14:30 -0800 Subject: [PATCH] add missing files, fix Issue #209 --- baselines/common/running_mean_std.py | 28 ++++++ baselines/common/vec_env/dummy_vec_env.py | 25 +++++ baselines/common/vec_env/vec_frame_stack.py | 50 ++++++++++ baselines/common/vec_env/vec_normalize.py | 104 ++++++++++++++++++++ 4 files changed, 207 insertions(+) create mode 100644 baselines/common/running_mean_std.py create mode 100644 baselines/common/vec_env/dummy_vec_env.py create mode 100644 baselines/common/vec_env/vec_frame_stack.py create mode 100644 baselines/common/vec_env/vec_normalize.py diff --git a/baselines/common/running_mean_std.py b/baselines/common/running_mean_std.py new file mode 100644 index 0000000..7dce4b3 --- /dev/null +++ b/baselines/common/running_mean_std.py @@ -0,0 +1,28 @@ +import numpy as np +class RunningMeanStd(object): + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm + def __init__(self, epsilon=1e-4, shape=()): + self.mean = np.zeros(shape, 'float64') + self.var = np.ones(shape, 'float64') + self.count = epsilon + + + def update(self, x): + batch_mean = np.mean(x, axis=0) + batch_var = np.var(x, axis=0) + batch_count = x.shape[0] + + delta = batch_mean - self.mean + tot_count = self.count + batch_count + + new_mean = self.mean + delta * batch_count / tot_count + m_a = self.var * (self.count) + m_b = batch_var * (batch_count) + M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count) + new_var = M2 / (self.count + batch_count) + + new_count = batch_count + self.count + + self.mean = new_mean + self.var = new_var + self.count = new_count \ No newline at end of file diff --git a/baselines/common/vec_env/dummy_vec_env.py b/baselines/common/vec_env/dummy_vec_env.py new file mode 100644 index 0000000..529a69f --- /dev/null +++ b/baselines/common/vec_env/dummy_vec_env.py @@ -0,0 +1,25 @@ +import numpy as np +from . import VecEnv + +class DummyVecEnv(VecEnv): + def __init__(self, env_fns): + self.envs = [fn() for fn in env_fns] + env = self.envs[0] + self.action_space = env.action_space + self.observation_space = env.observation_space + self.ts = np.zeros(len(self.envs), dtype='int') + def step(self, action_n): + results = [env.step(a) for (a,env) in zip(action_n, self.envs)] + obs, rews, dones, infos = map(np.array, zip(*results)) + self.ts += 1 + for (i, done) in enumerate(dones): + if done: + obs[i] = self.envs[i].reset() + self.ts[i] = 0 + return np.array(obs), np.array(rews), np.array(dones), infos + def reset(self): + results = [env.reset() for env in self.envs] + return np.array(results) + @property + def num_envs(self): + return len(self.envs) diff --git a/baselines/common/vec_env/vec_frame_stack.py b/baselines/common/vec_env/vec_frame_stack.py new file mode 100644 index 0000000..5f86011 --- /dev/null +++ b/baselines/common/vec_env/vec_frame_stack.py @@ -0,0 +1,50 @@ +from baselines.common.vec_env import VecEnv +import numpy as np +from gym import spaces + +class VecFrameStack(VecEnv): + """ + Vectorized environment base class + """ + def __init__(self, venv, nstack): + self.venv = venv + self.nstack = nstack + wos = venv.observation_space # wrapped ob space + low = np.repeat(wos.low, self.nstack, axis=-1) + high = np.repeat(wos.high, self.nstack, axis=-1) + self.stackedobs = np.zeros((venv.num_envs,)+low.shape, low.dtype) + self._observation_space = spaces.Box(low=low, high=high) + self._action_space = venv.action_space + def step(self, vac): + """ + Apply sequence of actions to sequence of environments + actions -> (observations, rewards, news) + + where 'news' is a boolean vector indicating whether each element is new. + """ + obs, rews, news, infos = self.venv.step(vac) + self.stackedobs = np.roll(self.stackedobs, shift=-1, axis=-1) + for (i, new) in enumerate(news): + if new: + self.stackedobs[i] = 0 + self.stackedobs[..., -obs.shape[-1]:] = obs + return self.stackedobs, rews, news, infos + def reset(self): + """ + Reset all environments + """ + obs = self.venv.reset() + self.stackedobs[...] = 0 + self.stackedobs[..., -obs.shape[-1]:] = obs + return self.stackedobs + @property + def action_space(self): + return self._action_space + @property + def observation_space(self): + return self._observation_space + def close(self): + self.venv.close() + @property + def num_envs(self): + return self.venv.num_envs \ No newline at end of file diff --git a/baselines/common/vec_env/vec_normalize.py b/baselines/common/vec_env/vec_normalize.py new file mode 100644 index 0000000..ef24498 --- /dev/null +++ b/baselines/common/vec_env/vec_normalize.py @@ -0,0 +1,104 @@ +from baselines.common.vec_env import VecEnv +from baselines.common.running_mean_std import RunningMeanStd +import numpy as np + +class VecNormalize(VecEnv): + """ + Vectorized environment base class + """ + def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): + self.venv = venv + self._observation_space = self.venv.observation_space + self._action_space = venv.action_space + self.ob_rms = RunningMeanStd(shape=self._observation_space.shape) if ob else None + self.ret_rms = RunningMeanStd(shape=()) if ret else None + self.clipob = clipob + self.cliprew = cliprew + self.ret = np.zeros(self.num_envs) + self.gamma = gamma + self.epsilon = epsilon + def step(self, vac): + """ + Apply sequence of actions to sequence of environments + actions -> (observations, rewards, news) + + where 'news' is a boolean vector indicating whether each element is new. + """ + obs, rews, news, infos = self.venv.step(vac) + self.ret = self.ret * self.gamma + rews + obs = self._obfilt(obs) + if self.ret_rms: + self.ret_rms.update(self.ret) + rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew) + return obs, rews, news, infos + def _obfilt(self, obs): + if self.ob_rms: + self.ob_rms.update(obs) + obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob) + return obs + else: + return obs + def reset(self): + """ + Reset all environments + """ + obs = self.venv.reset() + return self._obfilt(obs) + @property + def action_space(self): + return self._action_space + @property + def observation_space(self): + return self._observation_space + def close(self): + self.venv.close() + @property + def num_envs(self): + return self.venv.num_envs + + + +class RunningMeanStd(object): + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm + def __init__(self, epsilon=1e-4, shape=()): + self.mean = np.zeros(shape, 'float64') + self.var = np.zeros(shape, 'float64') + self.count = epsilon + + + def update(self, x): + batch_mean = np.mean(x, axis=0) + batch_var = np.var(x, axis=0) + batch_count = x.shape[0] + + delta = batch_mean - self.mean + tot_count = self.count + batch_count + + new_mean = self.mean + delta * batch_count / tot_count + m_a = self.var * (self.count) + m_b = batch_var * (batch_count) + M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count) + new_var = M2 / (self.count + batch_count) + + new_count = batch_count + self.count + + self.mean = new_mean + self.var = new_var + self.count = new_count + +def test_runningmeanstd(): + for (x1, x2, x3) in [ + (np.random.randn(3), np.random.randn(4), np.random.randn(5)), + (np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)), + ]: + + rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:]) + + x = np.concatenate([x1, x2, x3], axis=0) + ms1 = [x.mean(axis=0), x.var(axis=0)] + rms.update(x1) + rms.update(x2) + rms.update(x3) + ms2 = [rms.mean, rms.var] + + assert np.allclose(ms1, ms2)