make tests use single-threaded session for determinism of KfacOptimizer (#298)
* make tests use single-threaded session for determinism of KfacOptimizer * updated comment in kfac.py * remove unused sess_config
This commit is contained in:
@@ -90,7 +90,7 @@ class Model(object):
|
|||||||
self.initial_state = step_model.initial_state
|
self.initial_state = step_model.initial_state
|
||||||
tf.global_variables_initializer().run(session=sess)
|
tf.global_variables_initializer().run(session=sess)
|
||||||
|
|
||||||
def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interval=1, nprocs=32, nsteps=20,
|
def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interval=100, nprocs=32, nsteps=20,
|
||||||
ent_coef=0.01, vf_coef=0.5, vf_fisher_coef=1.0, lr=0.25, max_grad_norm=0.5,
|
ent_coef=0.01, vf_coef=0.5, vf_fisher_coef=1.0, lr=0.25, max_grad_norm=0.5,
|
||||||
kfac_clip=0.001, save_interval=None, lrschedule='linear', load_path=None, is_async=True, **network_kwargs):
|
kfac_clip=0.001, save_interval=None, lrschedule='linear', load_path=None, is_async=True, **network_kwargs):
|
||||||
set_global_seeds(seed)
|
set_global_seeds(seed)
|
||||||
|
@@ -11,7 +11,7 @@ KFAC_DEBUG = False
|
|||||||
|
|
||||||
|
|
||||||
class KfacOptimizer():
|
class KfacOptimizer():
|
||||||
|
# note that KfacOptimizer will be truly synchronous (and thus deterministic) only if a single-threaded session is used
|
||||||
def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60, full_stats_init=False, cold_iter=100, cold_lr=None, is_async=False, async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approxT2=False, use_float64=False, weight_decay_dict={},max_grad_norm=0.5):
|
def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2, stats_accum_iter=60, full_stats_init=False, cold_iter=100, cold_lr=None, is_async=False, async_stats=False, epsilon=1e-2, stats_decay=0.95, blockdiag_bias=False, channel_fac=False, factored_damping=False, approxT2=False, use_float64=False, weight_decay_dict={},max_grad_norm=0.5):
|
||||||
self.max_grad_norm = max_grad_norm
|
self.max_grad_norm = max_grad_norm
|
||||||
self._lr = learning_rate
|
self._lr = learning_rate
|
||||||
|
@@ -5,6 +5,12 @@ from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
|
|||||||
N_TRIALS = 10000
|
N_TRIALS = 10000
|
||||||
N_EPISODES = 100
|
N_EPISODES = 100
|
||||||
|
|
||||||
|
_sess_config = tf.ConfigProto(
|
||||||
|
allow_soft_placement=True,
|
||||||
|
intra_op_parallelism_threads=1,
|
||||||
|
inter_op_parallelism_threads=1
|
||||||
|
)
|
||||||
|
|
||||||
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
||||||
def seeded_env_fn():
|
def seeded_env_fn():
|
||||||
env = env_fn()
|
env = env_fn()
|
||||||
@@ -13,7 +19,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
|||||||
|
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
env = DummyVecEnv([seeded_env_fn])
|
env = DummyVecEnv([seeded_env_fn])
|
||||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
with tf.Graph().as_default(), tf.Session(config=_sess_config).as_default():
|
||||||
tf.set_random_seed(0)
|
tf.set_random_seed(0)
|
||||||
model = learn_fn(env)
|
model = learn_fn(env)
|
||||||
sum_rew = 0
|
sum_rew = 0
|
||||||
@@ -34,7 +40,7 @@ def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
|
|||||||
|
|
||||||
def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
|
def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N_EPISODES):
|
||||||
env = DummyVecEnv([env_fn])
|
env = DummyVecEnv([env_fn])
|
||||||
with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default():
|
with tf.Graph().as_default(), tf.Session(config=_sess_config).as_default():
|
||||||
model = learn_fn(env)
|
model = learn_fn(env)
|
||||||
N_TRIALS = 100
|
N_TRIALS = 100
|
||||||
observations, actions, rewards = rollout(env, model, N_TRIALS)
|
observations, actions, rewards = rollout(env, model, N_TRIALS)
|
||||||
|
Reference in New Issue
Block a user