Files
baselines/baselines/common/mpi_util.py
pzhokhov 8c2aea2add refactor a2c, acer, acktr, ppo2, deepq, and trpo_mpi (#490)
* exported rl-algs

* more stuff from rl-algs

* run slow tests

* re-exported rl_algs

* re-exported rl_algs - fixed problems with serialization test and test_cartpole

* replaced atari_arg_parser with common_arg_parser

* run.py can run algos from both baselines and rl_algs

* added approximate humanoid reward with ppo2 into the README for reference

* dummy commit to RUN BENCHMARKS

* dummy commit to RUN BENCHMARKS

* dummy commit to RUN BENCHMARKS

* dummy commit to RUN BENCHMARKS

* very dummy commit to RUN BENCHMARKS

* serialize variables as a dict, not as a list

* running_mean_std uses tensorflow variables

* fixed import in vec_normalize

* dummy commit to RUN BENCHMARKS

* dummy commit to RUN BENCHMARKS

* flake8 complaints

* save all variables to make sure we save the vec_normalize normalization

* benchmarks on ppo2 only RUN BENCHMARKS

* make_atari_env compatible with mpi

* run ppo_mpi benchmarks only RUN BENCHMARKS

* hardcode names of retro environments

* add defaults

* changed default ppo2 lr schedule to linear RUN BENCHMARKS

* non-tf normalization benchmark RUN BENCHMARKS

* use ncpu=1 for mujoco sessions - gives a bit of a performance speedup

* reverted running_mean_std to user property decorators for mean, var, count

* reverted VecNormalize to use RunningMeanStd (no tf)

* reverted VecNormalize to use RunningMeanStd (no tf)

* profiling wip

* use VecNormalize with regular RunningMeanStd

* added acer runner (missing import)

* flake8 complaints

* added a note in README about TfRunningMeanStd and serialization of VecNormalize

* dummy commit to RUN BENCHMARKS

* merged benchmarks branch
2018-08-13 09:56:44 -07:00

102 lines
3.0 KiB
Python

from collections import defaultdict
from mpi4py import MPI
import os, numpy as np
import platform
import shutil
import subprocess
def sync_from_root(sess, variables, comm=None):
"""
Send the root node's parameters to every worker.
Arguments:
sess: the TensorFlow session.
variables: all parameter variables including optimizer's
"""
if comm is None: comm = MPI.COMM_WORLD
rank = comm.Get_rank()
for var in variables:
if rank == 0:
comm.Bcast(sess.run(var))
else:
import tensorflow as tf
returned_var = np.empty(var.shape, dtype='float32')
comm.Bcast(returned_var)
sess.run(tf.assign(var, returned_var))
def gpu_count():
"""
Count the GPUs on this machine.
"""
if shutil.which('nvidia-smi') is None:
return 0
output = subprocess.check_output(['nvidia-smi', '--query-gpu=gpu_name', '--format=csv'])
return max(0, len(output.split(b'\n')) - 2)
def setup_mpi_gpus():
"""
Set CUDA_VISIBLE_DEVICES using MPI.
"""
num_gpus = gpu_count()
if num_gpus == 0:
return
local_rank, _ = get_local_rank_size(MPI.COMM_WORLD)
os.environ['CUDA_VISIBLE_DEVICES'] = str(local_rank % num_gpus)
def get_local_rank_size(comm):
"""
Returns the rank of each process on its machine
The processes on a given machine will be assigned ranks
0, 1, 2, ..., N-1,
where N is the number of processes on this machine.
Useful if you want to assign one gpu per machine
"""
this_node = platform.node()
ranks_nodes = comm.allgather((comm.Get_rank(), this_node))
node2rankssofar = defaultdict(int)
local_rank = None
for (rank, node) in ranks_nodes:
if rank == comm.Get_rank():
local_rank = node2rankssofar[node]
node2rankssofar[node] += 1
assert local_rank is not None
return local_rank, node2rankssofar[this_node]
def share_file(comm, path):
"""
Copies the file from rank 0 to all other ranks
Puts it in the same place on all machines
"""
localrank, _ = get_local_rank_size(comm)
if comm.Get_rank() == 0:
with open(path, 'rb') as fh:
data = fh.read()
comm.bcast(data)
else:
data = comm.bcast(None)
if localrank == 0:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as fh:
fh.write(data)
comm.Barrier()
def dict_gather(comm, d, op='mean', assert_all_have_data=True):
if comm is None: return d
alldicts = comm.allgather(d)
size = comm.size
k2li = defaultdict(list)
for d in alldicts:
for (k,v) in d.items():
k2li[k].append(v)
result = {}
for (k,li) in k2li.items():
if assert_all_have_data:
assert len(li)==size, "only %i out of %i MPI workers have sent '%s'" % (len(li), size, k)
if op=='mean':
result[k] = np.mean(li, axis=0)
elif op=='sum':
result[k] = np.sum(li, axis=0)
else:
assert 0, op
return result