From c6c0f45cb179f022b02f0b3021dd77e1f0ebea05 Mon Sep 17 00:00:00 2001 From: Tom Date: Tue, 28 Aug 2018 03:36:43 +0800 Subject: [PATCH] fix 'async' is a reserved word in Python >= 3.7 (#495) (#542) --- baselines/acktr/acktr_cont.py | 2 +- baselines/acktr/acktr_disc.py | 6 +++--- baselines/acktr/kfac.py | 4 ++-- baselines/acktr/value_functions.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/baselines/acktr/acktr_cont.py b/baselines/acktr/acktr_cont.py index 45f2fa2..397ac12 100644 --- a/baselines/acktr/acktr_cont.py +++ b/baselines/acktr/acktr_cont.py @@ -54,7 +54,7 @@ def learn(env, policy, vf, gamma, lam, timesteps_per_batch, num_timesteps, stepsize = tf.Variable(initial_value=np.float32(np.array(0.03)), name='stepsize') inputs, loss, loss_sampled = policy.update_info optim = kfac.KfacOptimizer(learning_rate=stepsize, cold_lr=stepsize*(1-0.9), momentum=0.9, kfac_update=2,\ - epsilon=1e-2, stats_decay=0.99, async=1, cold_iter=1, + epsilon=1e-2, stats_decay=0.99, async_=1, cold_iter=1, weight_decay_dict=policy.wd_dict, max_grad_norm=None) pi_var_list = [] for var in tf.trainable_variables(): diff --git a/baselines/acktr/acktr_disc.py b/baselines/acktr/acktr_disc.py index cfa028d..7e42bc6 100644 --- a/baselines/acktr/acktr_disc.py +++ b/baselines/acktr/acktr_disc.py @@ -58,7 +58,7 @@ class Model(object): with tf.device('/gpu:0'): self.optim = optim = kfac.KfacOptimizer(learning_rate=PG_LR, clip_kl=kfac_clip,\ momentum=0.9, kfac_update=1, epsilon=0.01,\ - stats_decay=0.99, async=1, cold_iter=10, max_grad_norm=max_grad_norm) + stats_decay=0.99, async_=1, cold_iter=10, max_grad_norm=max_grad_norm) update_stats_op = optim.compute_and_apply_stats(joint_fisher_loss, var_list=params) train_op, q_runner = optim.apply_gradients(list(zip(grads,params))) @@ -97,7 +97,7 @@ def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interva kfac_clip=0.001, save_interval=None, lrschedule='linear', load_path=None, **network_kwargs): set_global_seeds(seed) - + if network == 'cnn': network_kwargs['one_dim_bias'] = True @@ -115,7 +115,7 @@ def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interva with open(osp.join(logger.get_dir(), 'make_model.pkl'), 'wb') as fh: fh.write(cloudpickle.dumps(make_model)) model = make_model() - + if load_path is not None: model.load(load_path) diff --git a/baselines/acktr/kfac.py b/baselines/acktr/kfac.py index b420819..dc9077d 100644 --- a/baselines/acktr/kfac.py +++ b/baselines/acktr/kfac.py @@ -10,14 +10,14 @@ KFAC_DEBUG = False class KfacOptimizer(): - def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60, full_stats_init=False, cold_iter=100, cold_lr=None, async=False, async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approxT2=False, use_float64=False, weight_decay_dict={},max_grad_norm=0.5): + def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60, full_stats_init=False, cold_iter=100, cold_lr=None, async_=False, async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approxT2=False, use_float64=False, weight_decay_dict={},max_grad_norm=0.5): self.max_grad_norm = max_grad_norm self._lr = learning_rate self._momentum = momentum self._clip_kl = clip_kl self._channel_fac = channel_fac self._kfac_update = kfac_update - self._async = async + self._async = async_ self._async_stats = async_stats self._epsilon = epsilon self._stats_decay = stats_decay diff --git a/baselines/acktr/value_functions.py b/baselines/acktr/value_functions.py index d1e9e1a..3d9c519 100644 --- a/baselines/acktr/value_functions.py +++ b/baselines/acktr/value_functions.py @@ -21,7 +21,7 @@ class NeuralNetValueFunction(object): self._predict = U.function([X], vpred_n) optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \ clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \ - async=1, kfac_update=2, cold_iter=50, \ + async_=1, kfac_update=2, cold_iter=50, \ weight_decay_dict=wd_dict, max_grad_norm=None) vf_var_list = [] for var in tf.trainable_variables():