From f05b716b03edb810fb61b2bca0006591c736bdd6 Mon Sep 17 00:00:00 2001 From: Peter Zhokhov Date: Wed, 14 Nov 2018 11:01:05 -0800 Subject: [PATCH] trpo - increase reward and eplen buffer to 100 for consistency --- baselines/trpo_mpi/trpo_mpi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py index cd1e7ea..66add9c 100644 --- a/baselines/trpo_mpi/trpo_mpi.py +++ b/baselines/trpo_mpi/trpo_mpi.py @@ -273,8 +273,8 @@ def learn(*, timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() - lenbuffer = deque(maxlen=40) # rolling buffer for episode lengths - rewbuffer = deque(maxlen=40) # rolling buffer for episode rewards + lenbuffer = deque(maxlen=100) # rolling buffer for episode lengths + rewbuffer = deque(maxlen=100) # rolling buffer for episode rewards if sum([max_iters>0, total_timesteps>0, max_episodes>0])==0: # noththing to be done