mpi-less baselines (#689)
* make baselines run without mpi wip * squash-merged latest master * further removing MPI references where unnecessary * more MPI removal * syntax and flake8 * MpiAdam becomes regular Adam if Mpi not present * autopep8 * add assertion to test in mpi_adam; fix trpo_mpi failure without MPI on cartpole * mpiless ddpg
This commit is contained in:
@@ -12,8 +12,11 @@ import baselines.common.tf_util as U
|
||||
|
||||
from baselines import logger
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
except ImportError:
|
||||
MPI = None
|
||||
|
||||
def learn(network, env,
|
||||
seed=None,
|
||||
@@ -49,7 +52,11 @@ def learn(network, env,
|
||||
else:
|
||||
nb_epochs = 500
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
if MPI is not None:
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
else:
|
||||
rank = 0
|
||||
|
||||
nb_actions = env.action_space.shape[-1]
|
||||
assert (np.abs(env.action_space.low) == env.action_space.high).all() # we assume symmetric actions.
|
||||
|
||||
@@ -199,7 +206,11 @@ def learn(network, env,
|
||||
eval_episode_rewards_history.append(eval_episode_reward[d])
|
||||
eval_episode_reward[d] = 0.0
|
||||
|
||||
mpi_size = MPI.COMM_WORLD.Get_size()
|
||||
if MPI is not None:
|
||||
mpi_size = MPI.COMM_WORLD.Get_size()
|
||||
else:
|
||||
mpi_size = 1
|
||||
|
||||
# Log stats.
|
||||
# XXX shouldn't call np.mean on variable length lists
|
||||
duration = time.time() - start_time
|
||||
@@ -233,7 +244,10 @@ def learn(network, env,
|
||||
else:
|
||||
raise ValueError('expected scalar, got %s'%x)
|
||||
|
||||
combined_stats_sums = MPI.COMM_WORLD.allreduce(np.array([ np.array(x).flatten()[0] for x in combined_stats.values()]))
|
||||
combined_stats_sums = np.array([ np.array(x).flatten()[0] for x in combined_stats.values()])
|
||||
if MPI is not None:
|
||||
combined_stats_sums = MPI.COMM_WORLD.allreduce(combined_stats_sums)
|
||||
|
||||
combined_stats = {k : v / mpi_size for (k,v) in zip(combined_stats.keys(), combined_stats_sums)}
|
||||
|
||||
# Total statistics.
|
||||
|
@@ -9,7 +9,10 @@ from baselines import logger
|
||||
from baselines.common.mpi_adam import MpiAdam
|
||||
import baselines.common.tf_util as U
|
||||
from baselines.common.mpi_running_mean_std import RunningMeanStd
|
||||
from mpi4py import MPI
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
except ImportError:
|
||||
MPI = None
|
||||
|
||||
def normalize(x, stats):
|
||||
if stats is None:
|
||||
@@ -358,6 +361,11 @@ class DDPG(object):
|
||||
return stats
|
||||
|
||||
def adapt_param_noise(self):
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
except ImportError:
|
||||
MPI = None
|
||||
|
||||
if self.param_noise is None:
|
||||
return 0.
|
||||
|
||||
@@ -371,7 +379,16 @@ class DDPG(object):
|
||||
self.param_noise_stddev: self.param_noise.current_stddev,
|
||||
})
|
||||
|
||||
mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
|
||||
if MPI is not None:
|
||||
mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
|
||||
else:
|
||||
mean_distance = distance
|
||||
|
||||
if MPI is not None:
|
||||
mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
|
||||
else:
|
||||
mean_distance = distance
|
||||
|
||||
self.param_noise.adapt(mean_distance)
|
||||
return mean_distance
|
||||
|
||||
|
Reference in New Issue
Block a user