save all variables to make sure we save the vec_normalize normalization

This commit is contained in:
Peter Zhokhov
2018-08-01 10:12:19 -07:00
parent 151e48009e
commit 3528f7b992
7 changed files with 20 additions and 20 deletions

View File

@@ -74,8 +74,8 @@ class Model(object):
self.step = step_model.step self.step = step_model.step
self.value = step_model.value self.value = step_model.value
self.initial_state = step_model.initial_state self.initial_state = step_model.initial_state
self.save = functools.partial(tf_util.save_variables, sess=sess, variables=params) self.save = functools.partial(tf_util.save_variables, sess=sess)
self.load = functools.partial(tf_util.load_variables, sess=sess, variables=params) self.load = functools.partial(tf_util.load_variables, sess=sess)
tf.global_variables_initializer().run(session=sess) tf.global_variables_initializer().run(session=sess)

View File

@@ -83,8 +83,8 @@ class Model(object):
self.train = train self.train = train
self.save = functools.partial(save_variables, variables=params, sess=sess) self.save = functools.partial(save_variables, sess=sess)
self.load = functools.partial(load_variables, variables=params, sess=sess) self.load = functools.partial(load_variables, sess=sess)
self.train_model = train_model self.train_model = train_model
self.step_model = step_model self.step_model = step_model
self.step = step_model.step self.step = step_model.step

View File

@@ -86,6 +86,7 @@ register_benchmark({
'description': 'Some small 2D MuJoCo tasks, run for 1M timesteps', 'description': 'Some small 2D MuJoCo tasks, run for 1M timesteps',
'tasks': [{'env_id': _envid, 'trials': 6, 'num_timesteps': int(1e6)} for _envid in _mujocosmall] 'tasks': [{'env_id': _envid, 'trials': 6, 'num_timesteps': int(1e6)} for _envid in _mujocosmall]
}) })
register_benchmark({ register_benchmark({
'name': 'MujocoWalkers', 'name': 'MujocoWalkers',
'description': 'MuJoCo forward walkers, run for 8M, humanoid 100M', 'description': 'MuJoCo forward walkers, run for 8M, humanoid 100M',

View File

@@ -46,10 +46,10 @@ class TfRunningMeanStd(object):
_batch_count = tf.placeholder(shape=(), dtype=tf.float64) _batch_count = tf.placeholder(shape=(), dtype=tf.float64)
with tf.variable_scope(scope, reuse=False): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
_mean = tf.get_variable('mean', initializer=np.zeros(shape)) _mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64)
_var = tf.get_variable('std', initializer=np.ones(shape)) _var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64)
_count = tf.get_variable('count', initializer=np.ones(shape=())*epsilon) _count = tf.get_variable('count', initializer=np.ones((), 'float64')*epsilon, dtype=tf.float64)
delta = _batch_mean - _mean delta = _batch_mean - _mean
tot_count = _count + _batch_count tot_count = _count + _batch_count

View File

@@ -325,20 +325,20 @@ def save_state(fname, sess=None):
# The methods above and below are clearly doing the same thing, and in a rather similar way # The methods above and below are clearly doing the same thing, and in a rather similar way
# TODO: ensure there is no subtle differences and remove one # TODO: ensure there is no subtle differences and remove one
def save_variables(save_path, variables, sess=None): def save_variables(save_path, variables=None, sess=None):
if sess is None: sess = sess or get_session()
sess = get_session() variables = variables or tf.trainable_variables()
ps = sess.run(variables) ps = sess.run(variables)
save_dict = {v.name: value for v, value in zip(variables, ps)} save_dict = {v.name: value for v, value in zip(variables, ps)}
os.makedirs(os.path.dirname(save_path), exist_ok=True) os.makedirs(os.path.dirname(save_path), exist_ok=True)
joblib.dump(save_dict, save_path) joblib.dump(save_dict, save_path)
def load_variables(load_path, variables, sess=None): def load_variables(load_path, variables=None, sess=None):
if sess is None: sess = sess or get_session()
sess = get_session() variables = variables or tf.trainable_variables()
loaded_params = joblib.load(load_path) loaded_params = joblib.load(os.path.expanduser(load_path))
restores = [] restores = []
for v in variables: for v in variables:
restores.append(v.assign(loaded_params[v.name])) restores.append(v.assign(loaded_params[v.name]))

View File

@@ -8,8 +8,8 @@ class VecNormalize(VecEnvWrapper):
""" """
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8):
VecEnvWrapper.__init__(self, venv) VecEnvWrapper.__init__(self, venv)
# self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None #self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None
# self.ret_rms = RunningMeanStd(shape=()) if ret else None #self.ret_rms = RunningMeanStd(shape=()) if ret else None
self.ob_rms = TfRunningMeanStd(shape=self.observation_space.shape, scope='observation_running_mean_std') if ob else None self.ob_rms = TfRunningMeanStd(shape=self.observation_space.shape, scope='observation_running_mean_std') if ob else None
self.ret_rms = TfRunningMeanStd(shape=(), scope='return_running_mean_std') if ret else None self.ret_rms = TfRunningMeanStd(shape=(), scope='return_running_mean_std') if ret else None
self.clipob = clipob self.clipob = clipob

View File

@@ -81,9 +81,8 @@ class Model(object):
self.value = act_model.value self.value = act_model.value
self.initial_state = act_model.initial_state self.initial_state = act_model.initial_state
# If you want to load weights, also save/load observation scaling inside VecNormalize ? self.save = functools.partial(save_variables, sess=sess)
self.save = functools.partial(save_variables, sess=sess, variables=params) self.load = functools.partial(load_variables, sess=sess)
self.load = functools.partial(load_variables, sess=sess, variables=params)
if MPI.COMM_WORLD.Get_rank() == 0: if MPI.COMM_WORLD.Get_rank() == 0:
initialize() initialize()