* move vec_env * cleaning up rl_common * tests are passing (but mosts tests are deleted as moved to baselines) * add benchmark runner for smoke tests * removed duplicated algos * route references to rl_algs.a2c to baselines.a2c * route references to rl_algs.a2c to baselines.a2c * unify conftest.py * removing references to duplicated algs from codegen * removing references to duplicated algs from codegen * alex's changes to dummy_vec_env * fixed test_carpole[deepq] testcase by decreasing number of training steps... alex's changes seemed to have fixed the bug and make it train better, but at seed=0 there is a dip in the training curve at 30k steps that fails the test * codegen tests with atol=1e-6 seem to be unstable * rl_common.vec_env -> baselines.common.vec_env mass replace * fixed reference in trpo_mpi * a2c.util references * restored rl_algs.bench in sonic_prob * fix reference in ci/runtests.sh * simplifed expression in baselines/common/cmd_util * further increased rtol to 1e-3 in codegen tests * switched vecenvs to use SimpleImageViewer from gym instead of cv2 * make run.py --play option work with num_envs > 1 * make rosenbrock test reproducible * git subrepo pull (merge) baselines subrepo: subdir: "baselines" merged: "e23524a5" upstream: origin: "git@github.com:openai/baselines.git" branch: "master" commit: "bcde04e7" git-subrepo: version: "0.4.0" origin: "git@github.com:ingydotnet/git-subrepo.git" commit: "74339e8" * updated baselines README (num-timesteps --> num_timesteps) * typo in deepq/README.md
42 lines
1.4 KiB
Python
42 lines
1.4 KiB
Python
from . import VecEnvWrapper
|
|
from baselines.common.running_mean_std import RunningMeanStd
|
|
import numpy as np
|
|
|
|
|
|
class VecNormalize(VecEnvWrapper):
|
|
"""
|
|
A vectorized wrapper that normalizes the observations
|
|
and returns from an environment.
|
|
"""
|
|
|
|
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
|
|
VecEnvWrapper.__init__(self, venv)
|
|
self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
|
|
self.ret_rms = RunningMeanStd(shape=()) if ret else None
|
|
self.clipob = clipob
|
|
self.cliprew = cliprew
|
|
self.ret = np.zeros(self.num_envs)
|
|
self.gamma = gamma
|
|
self.epsilon = epsilon
|
|
|
|
def step_wait(self):
|
|
obs, rews, news, infos = self.venv.step_wait()
|
|
self.ret = self.ret * self.gamma + rews
|
|
obs = self._obfilt(obs)
|
|
if self.ret_rms:
|
|
self.ret_rms.update(self.ret)
|
|
rews = np.clip(rews / np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew, self.cliprew)
|
|
return obs, rews, news, infos
|
|
|
|
def _obfilt(self, obs):
|
|
if self.ob_rms:
|
|
self.ob_rms.update(obs)
|
|
obs = np.clip((obs - self.ob_rms.mean) / np.sqrt(self.ob_rms.var + self.epsilon), -self.clipob, self.clipob)
|
|
return obs
|
|
else:
|
|
return obs
|
|
|
|
def reset(self):
|
|
obs = self.venv.reset()
|
|
return self._obfilt(obs)
|