mpi-less baselines (#689)

* make baselines run without mpi wip * squash-merged latest master * further removing MPI references where unnecessary * more MPI removal * syntax and flake8 * MpiAdam becomes regular Adam if Mpi not present * autopep8 * add assertion to test in mpi_adam; fix trpo_mpi failure without MPI on cartpole * mpiless ddpg
2018-10-31 11:15:41 -07:00
parent a071fa7630
commit ab59de6922
8 changed files with 124 additions and 45 deletions
--- a/baselines/ddpg/ddpg.py
+++ b/baselines/ddpg/ddpg.py
@@ -12,8 +12,11 @@ import baselines.common.tf_util as U

 from baselines import logger
 import numpy as np
-from mpi4py import MPI

+try:
+    from mpi4py import MPI
+except ImportError:
+    MPI = None

 def learn(network, env,
          seed=None,
@@ -49,7 +52,11 @@ def learn(network, env,
    else:
        nb_epochs = 500

-    rank = MPI.COMM_WORLD.Get_rank()
+    if MPI is not None:
+        rank = MPI.COMM_WORLD.Get_rank()
+    else:
+        rank = 0
+
    nb_actions = env.action_space.shape[-1]
    assert (np.abs(env.action_space.low) == env.action_space.high).all()  # we assume symmetric actions.

@@ -199,7 +206,11 @@ def learn(network, env,
                            eval_episode_rewards_history.append(eval_episode_reward[d])
                            eval_episode_reward[d] = 0.0

-        mpi_size = MPI.COMM_WORLD.Get_size()
+        if MPI is not None:
+            mpi_size = MPI.COMM_WORLD.Get_size()
+        else:
+            mpi_size = 1
+
        # Log stats.
        # XXX shouldn't call np.mean on variable length lists
        duration = time.time() - start_time
@@ -233,7 +244,10 @@ def learn(network, env,
            else:
                raise ValueError('expected scalar, got %s'%x)

-        combined_stats_sums = MPI.COMM_WORLD.allreduce(np.array([ np.array(x).flatten()[0] for x in combined_stats.values()]))
+        combined_stats_sums = np.array([ np.array(x).flatten()[0] for x in combined_stats.values()])
+        if MPI is not None:
+            combined_stats_sums = MPI.COMM_WORLD.allreduce(combined_stats_sums)
+
        combined_stats = {k : v / mpi_size for (k,v) in zip(combined_stats.keys(), combined_stats_sums)}

        # Total statistics.
--- a/baselines/ddpg/ddpg_learner.py
+++ b/baselines/ddpg/ddpg_learner.py
@@ -9,7 +9,10 @@ from baselines import logger
 from baselines.common.mpi_adam import MpiAdam
 import baselines.common.tf_util as U
 from baselines.common.mpi_running_mean_std import RunningMeanStd
-from mpi4py import MPI
+try:
+    from mpi4py import MPI
+except ImportError:
+    MPI = None

 def normalize(x, stats):
    if stats is None:
@@ -358,6 +361,11 @@ class DDPG(object):
        return stats

    def adapt_param_noise(self):
+        try:
+            from mpi4py import MPI
+        except ImportError:
+            MPI = None
+
        if self.param_noise is None:
            return 0.

@@ -371,7 +379,16 @@ class DDPG(object):
            self.param_noise_stddev: self.param_noise.current_stddev,
        })

-        mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
+        if MPI is not None:
+            mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
+        else:
+            mean_distance = distance
+
+        if MPI is not None:
+            mean_distance = MPI.COMM_WORLD.allreduce(distance, op=MPI.SUM) / MPI.COMM_WORLD.Get_size()
+        else:
+            mean_distance = distance
+
        self.param_noise.adapt(mean_distance)
        return mean_distance