add missing files, fix Issue #209
This commit is contained in:
28
baselines/common/running_mean_std.py
Normal file
28
baselines/common/running_mean_std.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import numpy as np
|
||||||
|
class RunningMeanStd(object):
|
||||||
|
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
|
||||||
|
def __init__(self, epsilon=1e-4, shape=()):
|
||||||
|
self.mean = np.zeros(shape, 'float64')
|
||||||
|
self.var = np.ones(shape, 'float64')
|
||||||
|
self.count = epsilon
|
||||||
|
|
||||||
|
|
||||||
|
def update(self, x):
|
||||||
|
batch_mean = np.mean(x, axis=0)
|
||||||
|
batch_var = np.var(x, axis=0)
|
||||||
|
batch_count = x.shape[0]
|
||||||
|
|
||||||
|
delta = batch_mean - self.mean
|
||||||
|
tot_count = self.count + batch_count
|
||||||
|
|
||||||
|
new_mean = self.mean + delta * batch_count / tot_count
|
||||||
|
m_a = self.var * (self.count)
|
||||||
|
m_b = batch_var * (batch_count)
|
||||||
|
M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
|
||||||
|
new_var = M2 / (self.count + batch_count)
|
||||||
|
|
||||||
|
new_count = batch_count + self.count
|
||||||
|
|
||||||
|
self.mean = new_mean
|
||||||
|
self.var = new_var
|
||||||
|
self.count = new_count
|
25
baselines/common/vec_env/dummy_vec_env.py
Normal file
25
baselines/common/vec_env/dummy_vec_env.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import numpy as np
|
||||||
|
from . import VecEnv
|
||||||
|
|
||||||
|
class DummyVecEnv(VecEnv):
|
||||||
|
def __init__(self, env_fns):
|
||||||
|
self.envs = [fn() for fn in env_fns]
|
||||||
|
env = self.envs[0]
|
||||||
|
self.action_space = env.action_space
|
||||||
|
self.observation_space = env.observation_space
|
||||||
|
self.ts = np.zeros(len(self.envs), dtype='int')
|
||||||
|
def step(self, action_n):
|
||||||
|
results = [env.step(a) for (a,env) in zip(action_n, self.envs)]
|
||||||
|
obs, rews, dones, infos = map(np.array, zip(*results))
|
||||||
|
self.ts += 1
|
||||||
|
for (i, done) in enumerate(dones):
|
||||||
|
if done:
|
||||||
|
obs[i] = self.envs[i].reset()
|
||||||
|
self.ts[i] = 0
|
||||||
|
return np.array(obs), np.array(rews), np.array(dones), infos
|
||||||
|
def reset(self):
|
||||||
|
results = [env.reset() for env in self.envs]
|
||||||
|
return np.array(results)
|
||||||
|
@property
|
||||||
|
def num_envs(self):
|
||||||
|
return len(self.envs)
|
50
baselines/common/vec_env/vec_frame_stack.py
Normal file
50
baselines/common/vec_env/vec_frame_stack.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from baselines.common.vec_env import VecEnv
|
||||||
|
import numpy as np
|
||||||
|
from gym import spaces
|
||||||
|
|
||||||
|
class VecFrameStack(VecEnv):
|
||||||
|
"""
|
||||||
|
Vectorized environment base class
|
||||||
|
"""
|
||||||
|
def __init__(self, venv, nstack):
|
||||||
|
self.venv = venv
|
||||||
|
self.nstack = nstack
|
||||||
|
wos = venv.observation_space # wrapped ob space
|
||||||
|
low = np.repeat(wos.low, self.nstack, axis=-1)
|
||||||
|
high = np.repeat(wos.high, self.nstack, axis=-1)
|
||||||
|
self.stackedobs = np.zeros((venv.num_envs,)+low.shape, low.dtype)
|
||||||
|
self._observation_space = spaces.Box(low=low, high=high)
|
||||||
|
self._action_space = venv.action_space
|
||||||
|
def step(self, vac):
|
||||||
|
"""
|
||||||
|
Apply sequence of actions to sequence of environments
|
||||||
|
actions -> (observations, rewards, news)
|
||||||
|
|
||||||
|
where 'news' is a boolean vector indicating whether each element is new.
|
||||||
|
"""
|
||||||
|
obs, rews, news, infos = self.venv.step(vac)
|
||||||
|
self.stackedobs = np.roll(self.stackedobs, shift=-1, axis=-1)
|
||||||
|
for (i, new) in enumerate(news):
|
||||||
|
if new:
|
||||||
|
self.stackedobs[i] = 0
|
||||||
|
self.stackedobs[..., -obs.shape[-1]:] = obs
|
||||||
|
return self.stackedobs, rews, news, infos
|
||||||
|
def reset(self):
|
||||||
|
"""
|
||||||
|
Reset all environments
|
||||||
|
"""
|
||||||
|
obs = self.venv.reset()
|
||||||
|
self.stackedobs[...] = 0
|
||||||
|
self.stackedobs[..., -obs.shape[-1]:] = obs
|
||||||
|
return self.stackedobs
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return self._action_space
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
return self._observation_space
|
||||||
|
def close(self):
|
||||||
|
self.venv.close()
|
||||||
|
@property
|
||||||
|
def num_envs(self):
|
||||||
|
return self.venv.num_envs
|
104
baselines/common/vec_env/vec_normalize.py
Normal file
104
baselines/common/vec_env/vec_normalize.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
from baselines.common.vec_env import VecEnv
|
||||||
|
from baselines.common.running_mean_std import RunningMeanStd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class VecNormalize(VecEnv):
|
||||||
|
"""
|
||||||
|
Vectorized environment base class
|
||||||
|
"""
|
||||||
|
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
|
||||||
|
self.venv = venv
|
||||||
|
self._observation_space = self.venv.observation_space
|
||||||
|
self._action_space = venv.action_space
|
||||||
|
self.ob_rms = RunningMeanStd(shape=self._observation_space.shape) if ob else None
|
||||||
|
self.ret_rms = RunningMeanStd(shape=()) if ret else None
|
||||||
|
self.clipob = clipob
|
||||||
|
self.cliprew = cliprew
|
||||||
|
self.ret = np.zeros(self.num_envs)
|
||||||
|
self.gamma = gamma
|
||||||
|
self.epsilon = epsilon
|
||||||
|
def step(self, vac):
|
||||||
|
"""
|
||||||
|
Apply sequence of actions to sequence of environments
|
||||||
|
actions -> (observations, rewards, news)
|
||||||
|
|
||||||
|
where 'news' is a boolean vector indicating whether each element is new.
|
||||||
|
"""
|
||||||
|
obs, rews, news, infos = self.venv.step(vac)
|
||||||
|
self.ret = self.ret * self.gamma + rews
|
||||||
|
obs = self._obfilt(obs)
|
||||||
|
if self.ret_rms:
|
||||||
|
self.ret_rms.update(self.ret)
|
||||||
|
rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
|
||||||
|
return obs, rews, news, infos
|
||||||
|
def _obfilt(self, obs):
|
||||||
|
if self.ob_rms:
|
||||||
|
self.ob_rms.update(obs)
|
||||||
|
obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob)
|
||||||
|
return obs
|
||||||
|
else:
|
||||||
|
return obs
|
||||||
|
def reset(self):
|
||||||
|
"""
|
||||||
|
Reset all environments
|
||||||
|
"""
|
||||||
|
obs = self.venv.reset()
|
||||||
|
return self._obfilt(obs)
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return self._action_space
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
return self._observation_space
|
||||||
|
def close(self):
|
||||||
|
self.venv.close()
|
||||||
|
@property
|
||||||
|
def num_envs(self):
|
||||||
|
return self.venv.num_envs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RunningMeanStd(object):
|
||||||
|
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
|
||||||
|
def __init__(self, epsilon=1e-4, shape=()):
|
||||||
|
self.mean = np.zeros(shape, 'float64')
|
||||||
|
self.var = np.zeros(shape, 'float64')
|
||||||
|
self.count = epsilon
|
||||||
|
|
||||||
|
|
||||||
|
def update(self, x):
|
||||||
|
batch_mean = np.mean(x, axis=0)
|
||||||
|
batch_var = np.var(x, axis=0)
|
||||||
|
batch_count = x.shape[0]
|
||||||
|
|
||||||
|
delta = batch_mean - self.mean
|
||||||
|
tot_count = self.count + batch_count
|
||||||
|
|
||||||
|
new_mean = self.mean + delta * batch_count / tot_count
|
||||||
|
m_a = self.var * (self.count)
|
||||||
|
m_b = batch_var * (batch_count)
|
||||||
|
M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
|
||||||
|
new_var = M2 / (self.count + batch_count)
|
||||||
|
|
||||||
|
new_count = batch_count + self.count
|
||||||
|
|
||||||
|
self.mean = new_mean
|
||||||
|
self.var = new_var
|
||||||
|
self.count = new_count
|
||||||
|
|
||||||
|
def test_runningmeanstd():
|
||||||
|
for (x1, x2, x3) in [
|
||||||
|
(np.random.randn(3), np.random.randn(4), np.random.randn(5)),
|
||||||
|
(np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)),
|
||||||
|
]:
|
||||||
|
|
||||||
|
rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:])
|
||||||
|
|
||||||
|
x = np.concatenate([x1, x2, x3], axis=0)
|
||||||
|
ms1 = [x.mean(axis=0), x.var(axis=0)]
|
||||||
|
rms.update(x1)
|
||||||
|
rms.update(x2)
|
||||||
|
rms.update(x3)
|
||||||
|
ms2 = [rms.mean, rms.var]
|
||||||
|
|
||||||
|
assert np.allclose(ms1, ms2)
|
Reference in New Issue
Block a user