@@ -54,7 +54,7 @@ def learn(env, policy, vf, gamma, lam, timesteps_per_batch, num_timesteps,
|
|||||||
stepsize = tf.Variable(initial_value=np.float32(np.array(0.03)), name='stepsize')
|
stepsize = tf.Variable(initial_value=np.float32(np.array(0.03)), name='stepsize')
|
||||||
inputs, loss, loss_sampled = policy.update_info
|
inputs, loss, loss_sampled = policy.update_info
|
||||||
optim = kfac.KfacOptimizer(learning_rate=stepsize, cold_lr=stepsize*(1-0.9), momentum=0.9, kfac_update=2,\
|
optim = kfac.KfacOptimizer(learning_rate=stepsize, cold_lr=stepsize*(1-0.9), momentum=0.9, kfac_update=2,\
|
||||||
epsilon=1e-2, stats_decay=0.99, async=1, cold_iter=1,
|
epsilon=1e-2, stats_decay=0.99, async_=1, cold_iter=1,
|
||||||
weight_decay_dict=policy.wd_dict, max_grad_norm=None)
|
weight_decay_dict=policy.wd_dict, max_grad_norm=None)
|
||||||
pi_var_list = []
|
pi_var_list = []
|
||||||
for var in tf.trainable_variables():
|
for var in tf.trainable_variables():
|
||||||
|
@@ -58,7 +58,7 @@ class Model(object):
|
|||||||
with tf.device('/gpu:0'):
|
with tf.device('/gpu:0'):
|
||||||
self.optim = optim = kfac.KfacOptimizer(learning_rate=PG_LR, clip_kl=kfac_clip,\
|
self.optim = optim = kfac.KfacOptimizer(learning_rate=PG_LR, clip_kl=kfac_clip,\
|
||||||
momentum=0.9, kfac_update=1, epsilon=0.01,\
|
momentum=0.9, kfac_update=1, epsilon=0.01,\
|
||||||
stats_decay=0.99, async=1, cold_iter=10, max_grad_norm=max_grad_norm)
|
stats_decay=0.99, async_=1, cold_iter=10, max_grad_norm=max_grad_norm)
|
||||||
|
|
||||||
update_stats_op = optim.compute_and_apply_stats(joint_fisher_loss, var_list=params)
|
update_stats_op = optim.compute_and_apply_stats(joint_fisher_loss, var_list=params)
|
||||||
train_op, q_runner = optim.apply_gradients(list(zip(grads,params)))
|
train_op, q_runner = optim.apply_gradients(list(zip(grads,params)))
|
||||||
@@ -97,7 +97,7 @@ def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interva
|
|||||||
kfac_clip=0.001, save_interval=None, lrschedule='linear', load_path=None, **network_kwargs):
|
kfac_clip=0.001, save_interval=None, lrschedule='linear', load_path=None, **network_kwargs):
|
||||||
set_global_seeds(seed)
|
set_global_seeds(seed)
|
||||||
|
|
||||||
|
|
||||||
if network == 'cnn':
|
if network == 'cnn':
|
||||||
network_kwargs['one_dim_bias'] = True
|
network_kwargs['one_dim_bias'] = True
|
||||||
|
|
||||||
@@ -115,7 +115,7 @@ def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interva
|
|||||||
with open(osp.join(logger.get_dir(), 'make_model.pkl'), 'wb') as fh:
|
with open(osp.join(logger.get_dir(), 'make_model.pkl'), 'wb') as fh:
|
||||||
fh.write(cloudpickle.dumps(make_model))
|
fh.write(cloudpickle.dumps(make_model))
|
||||||
model = make_model()
|
model = make_model()
|
||||||
|
|
||||||
if load_path is not None:
|
if load_path is not None:
|
||||||
model.load(load_path)
|
model.load(load_path)
|
||||||
|
|
||||||
|
@@ -10,14 +10,14 @@ KFAC_DEBUG = False
|
|||||||
|
|
||||||
class KfacOptimizer():
|
class KfacOptimizer():
|
||||||
|
|
||||||
def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60, full_stats_init=False, cold_iter=100, cold_lr=None, async=False, async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approxT2=False, use_float64=False, weight_decay_dict={},max_grad_norm=0.5):
|
def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60, full_stats_init=False, cold_iter=100, cold_lr=None, async_=False, async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approxT2=False, use_float64=False, weight_decay_dict={},max_grad_norm=0.5):
|
||||||
self.max_grad_norm = max_grad_norm
|
self.max_grad_norm = max_grad_norm
|
||||||
self._lr = learning_rate
|
self._lr = learning_rate
|
||||||
self._momentum = momentum
|
self._momentum = momentum
|
||||||
self._clip_kl = clip_kl
|
self._clip_kl = clip_kl
|
||||||
self._channel_fac = channel_fac
|
self._channel_fac = channel_fac
|
||||||
self._kfac_update = kfac_update
|
self._kfac_update = kfac_update
|
||||||
self._async = async
|
self._async = async_
|
||||||
self._async_stats = async_stats
|
self._async_stats = async_stats
|
||||||
self._epsilon = epsilon
|
self._epsilon = epsilon
|
||||||
self._stats_decay = stats_decay
|
self._stats_decay = stats_decay
|
||||||
|
@@ -21,7 +21,7 @@ class NeuralNetValueFunction(object):
|
|||||||
self._predict = U.function([X], vpred_n)
|
self._predict = U.function([X], vpred_n)
|
||||||
optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \
|
optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \
|
||||||
clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \
|
clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \
|
||||||
async=1, kfac_update=2, cold_iter=50, \
|
async_=1, kfac_update=2, cold_iter=50, \
|
||||||
weight_decay_dict=wd_dict, max_grad_norm=None)
|
weight_decay_dict=wd_dict, max_grad_norm=None)
|
||||||
vf_var_list = []
|
vf_var_list = []
|
||||||
for var in tf.trainable_variables():
|
for var in tf.trainable_variables():
|
||||||
|
Reference in New Issue
Block a user