From a08af5d07dab1e3486af6244b5e2b13a231427fa Mon Sep 17 00:00:00 2001 From: pzhokhov Date: Fri, 22 Mar 2019 16:28:29 -0700 Subject: [PATCH] make tests use single-threaded session for determinism of KfacOptimizer (#298) * make tests use single-threaded session for determinism of KfacOptimizer * updated comment in kfac.py * remove unused sess_config --- baselines/acktr/acktr.py | 2 +- baselines/acktr/kfac.py | 2 +- baselines/common/tests/util.py | 10 ++++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/baselines/acktr/acktr.py b/baselines/acktr/acktr.py index 10ab32b..18f0f31 100644 --- a/baselines/acktr/acktr.py +++ b/baselines/acktr/acktr.py @@ -90,7 +90,7 @@ class Model(object): self.initial_state = step_model.initial_state tf.global_variables_initializer().run(session=sess) -def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interval=1, nprocs=32, nsteps=20, +def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interval=100, nprocs=32, nsteps=20, ent_coef=0.01, vf_coef=0.5, vf_fisher_coef=1.0, lr=0.25, max_grad_norm=0.5, kfac_clip=0.001, save_interval=None, lrschedule='linear', load_path=None, is_async=True, **network_kwargs): set_global_seeds(seed) diff --git a/baselines/acktr/kfac.py b/baselines/acktr/kfac.py index fac84f0..3d4a8c2 100644 --- a/baselines/acktr/kfac.py +++ b/baselines/acktr/kfac.py @@ -11,7 +11,7 @@ KFAC_DEBUG = False class KfacOptimizer(): - + # note that KfacOptimizer will be truly synchronous (and thus deterministic) only if a single-threaded session is used def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60, full_stats_init=False, cold_iter=100, cold_lr=None, is_async=False, async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approxT2=False, use_float64=False, weight_decay_dict={},max_grad_norm=0.5): self.max_grad_norm = max_grad_norm self._lr = learning_rate diff --git a/baselines/common/tests/util.py b/baselines/common/tests/util.py index 38ea4dc..441e3f7 100644 --- a/baselines/common/tests/util.py +++ b/baselines/common/tests/util.py @@ -5,6 +5,12 @@ from baselines.common.vec_env.dummy_vec_env import DummyVecEnv N_TRIALS = 10000 N_EPISODES = 100 +_sess_config = tf.ConfigProto( + allow_soft_placement=True, + intra_op_parallelism_threads=1, + inter_op_parallelism_threads=1 +) + def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): def seeded_env_fn(): env = env_fn() @@ -13,7 +19,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): np.random.seed(0) env = DummyVecEnv([seeded_env_fn]) - with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default(): + with tf.Graph().as_default(), tf.Session(config=_sess_config).as_default(): tf.set_random_seed(0) model = learn_fn(env) sum_rew = 0 @@ -34,7 +40,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES): env = DummyVecEnv([env_fn]) - with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default(): + with tf.Graph().as_default(), tf.Session(config=_sess_config).as_default(): model = learn_fn(env) N_TRIALS = 100 observations, actions, rewards = rollout(env, model, N_TRIALS)