diff --git a/baselines/common/models.py b/baselines/common/models.py
index 0798916..a6fe467 100644
--- a/baselines/common/models.py
+++ b/baselines/common/models.py
@@ -30,8 +30,17 @@ def build_impala_cnn(unscaled_images, depths=[16,32,32], **conv_kwargs):
     Model used in the paper "IMPALA: Scalable Distributed Deep-RL with
     Importance Weighted Actor-Learner Architectures" https://arxiv.org/abs/1802.01561
     """
+
+    layer_num = 0
+
+    def get_layer_num_str():
+        nonlocal layer_num
+        num_str = str(layer_num)
+        layer_num += 1
+        return num_str
+
     def conv_layer(out, depth):
-        return tf.layers.conv2d(out, depth, 3, padding='same')
+        return tf.layers.conv2d(out, depth, 3, padding='same', name='layer_' + get_layer_num_str())
 
     def residual_block(inputs):
         depth = inputs.get_shape()[-1].value
@@ -57,7 +66,7 @@ def build_impala_cnn(unscaled_images, depths=[16,32,32], **conv_kwargs):
 
     out = tf.layers.flatten(out)
     out = tf.nn.relu(out)
-    out = tf.layers.dense(out, 256, activation=tf.nn.relu)
+    out = tf.layers.dense(out, 256, activation=tf.nn.relu, name='layer_' + get_layer_num_str())
 
     return out
 
diff --git a/baselines/ppo2/microbatched_model.py b/baselines/ppo2/microbatched_model.py
index 8d8b688..a35b830 100644
--- a/baselines/ppo2/microbatched_model.py
+++ b/baselines/ppo2/microbatched_model.py
@@ -8,7 +8,7 @@ class MicrobatchedModel(Model):
     on the entire minibatch causes some overflow
     """
     def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_train,
-                nsteps, ent_coef, vf_coef, max_grad_norm, mpi_rank_weight, microbatch_size):
+                nsteps, ent_coef, vf_coef, max_grad_norm, mpi_rank_weight, comm, microbatch_size):
 
         self.nmicrobatches = nbatch_train // microbatch_size
         self.microbatch_size = microbatch_size
@@ -24,7 +24,8 @@ class MicrobatchedModel(Model):
                 ent_coef=ent_coef,
                 vf_coef=vf_coef,
                 max_grad_norm=max_grad_norm,
-                mpi_rank_weight=mpi_rank_weight)
+                mpi_rank_weight=mpi_rank_weight,
+                comm=comm)
 
         self.grads_ph = [tf.placeholder(dtype=g.dtype, shape=g.shape) for g in self.grads]
         grads_ph_and_vars = list(zip(self.grads_ph, self.var))
diff --git a/baselines/ppo2/model.py b/baselines/ppo2/model.py
index 9370d5c..35a883d 100644
--- a/baselines/ppo2/model.py
+++ b/baselines/ppo2/model.py
@@ -25,9 +25,12 @@ class Model(object):
     - Save load the model
     """
     def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_train,
-                nsteps, ent_coef, vf_coef, max_grad_norm, mpi_rank_weight=1, microbatch_size=None):
+                nsteps, ent_coef, vf_coef, max_grad_norm, mpi_rank_weight=1, comm=None, microbatch_size=None):
         self.sess = sess = get_session()
 
+        if MPI is not None and comm is None:
+            comm = MPI.COMM_WORLD
+
         with tf.variable_scope('ppo2_model', reuse=tf.AUTO_REUSE):
             # CREATE OUR TWO MODELS
             # act_model that is used for sampling
@@ -91,8 +94,8 @@ class Model(object):
         # 1. Get the model parameters
         params = tf.trainable_variables('ppo2_model')
         # 2. Build our trainer
-        if MPI is not None:
-            self.trainer = MpiAdamOptimizer(MPI.COMM_WORLD, learning_rate=LR, mpi_rank_weight=mpi_rank_weight, epsilon=1e-5)
+        if comm is not None and comm.Get_size() > 1:
+            self.trainer = MpiAdamOptimizer(comm, learning_rate=LR, mpi_rank_weight=mpi_rank_weight, epsilon=1e-5)
         else:
             self.trainer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-5)
         # 3. Calculate the gradients
diff --git a/baselines/ppo2/ppo2.py b/baselines/ppo2/ppo2.py
index 09bc933..f3a69d8 100644
--- a/baselines/ppo2/ppo2.py
+++ b/baselines/ppo2/ppo2.py
@@ -21,7 +21,7 @@ def constfn(val):
 def learn(*, network, env, total_timesteps, eval_env = None, seed=None, nsteps=2048, ent_coef=0.0, lr=3e-4,
             vf_coef=0.5,  max_grad_norm=0.5, gamma=0.99, lam=0.95,
             log_interval=10, nminibatches=4, noptepochs=4, cliprange=0.2,
-            save_interval=0, load_path=None, model_fn=None, update_fn=None, init_fn=None, mpi_rank_weight=1, **network_kwargs):
+            save_interval=0, load_path=None, model_fn=None, update_fn=None, init_fn=None, mpi_rank_weight=1, comm=None, **network_kwargs):
     '''
     Learn policy using PPO algorithm (https://arxiv.org/abs/1707.06347)
 
@@ -105,7 +105,7 @@ def learn(*, network, env, total_timesteps, eval_env = None, seed=None, nsteps=2
 
     model = model_fn(policy=policy, ob_space=ob_space, ac_space=ac_space, nbatch_act=nenvs, nbatch_train=nbatch_train,
                     nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef,
-                    max_grad_norm=max_grad_norm, mpi_rank_weight=mpi_rank_weight)
+                    max_grad_norm=max_grad_norm, comm=comm, mpi_rank_weight=mpi_rank_weight)
 
     if load_path is not None:
         model.load(load_path)