* exported rl-algs * more stuff from rl-algs * run slow tests * re-exported rl_algs * re-exported rl_algs - fixed problems with serialization test and test_cartpole * replaced atari_arg_parser with common_arg_parser * run.py can run algos from both baselines and rl_algs * added approximate humanoid reward with ppo2 into the README for reference * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * very dummy commit to RUN BENCHMARKS * serialize variables as a dict, not as a list * running_mean_std uses tensorflow variables * fixed import in vec_normalize * dummy commit to RUN BENCHMARKS * dummy commit to RUN BENCHMARKS * flake8 complaints * save all variables to make sure we save the vec_normalize normalization * benchmarks on ppo2 only RUN BENCHMARKS * make_atari_env compatible with mpi * run ppo_mpi benchmarks only RUN BENCHMARKS * hardcode names of retro environments * add defaults * changed default ppo2 lr schedule to linear RUN BENCHMARKS * non-tf normalization benchmark RUN BENCHMARKS * use ncpu=1 for mujoco sessions - gives a bit of a performance speedup * reverted running_mean_std to user property decorators for mean, var, count * reverted VecNormalize to use RunningMeanStd (no tf) * reverted VecNormalize to use RunningMeanStd (no tf) * profiling wip * use VecNormalize with regular RunningMeanStd * added acer runner (missing import) * flake8 complaints * added a note in README about TfRunningMeanStd and serialization of VecNormalize * dummy commit to RUN BENCHMARKS * merged benchmarks branch
102 lines
3.0 KiB
Python
102 lines
3.0 KiB
Python
from collections import defaultdict
|
|
from mpi4py import MPI
|
|
import os, numpy as np
|
|
import platform
|
|
import shutil
|
|
import subprocess
|
|
|
|
def sync_from_root(sess, variables, comm=None):
|
|
"""
|
|
Send the root node's parameters to every worker.
|
|
Arguments:
|
|
sess: the TensorFlow session.
|
|
variables: all parameter variables including optimizer's
|
|
"""
|
|
if comm is None: comm = MPI.COMM_WORLD
|
|
rank = comm.Get_rank()
|
|
for var in variables:
|
|
if rank == 0:
|
|
comm.Bcast(sess.run(var))
|
|
else:
|
|
import tensorflow as tf
|
|
returned_var = np.empty(var.shape, dtype='float32')
|
|
comm.Bcast(returned_var)
|
|
sess.run(tf.assign(var, returned_var))
|
|
|
|
def gpu_count():
|
|
"""
|
|
Count the GPUs on this machine.
|
|
"""
|
|
if shutil.which('nvidia-smi') is None:
|
|
return 0
|
|
output = subprocess.check_output(['nvidia-smi', '--query-gpu=gpu_name', '--format=csv'])
|
|
return max(0, len(output.split(b'\n')) - 2)
|
|
|
|
def setup_mpi_gpus():
|
|
"""
|
|
Set CUDA_VISIBLE_DEVICES using MPI.
|
|
"""
|
|
num_gpus = gpu_count()
|
|
if num_gpus == 0:
|
|
return
|
|
local_rank, _ = get_local_rank_size(MPI.COMM_WORLD)
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(local_rank % num_gpus)
|
|
|
|
def get_local_rank_size(comm):
|
|
"""
|
|
Returns the rank of each process on its machine
|
|
The processes on a given machine will be assigned ranks
|
|
0, 1, 2, ..., N-1,
|
|
where N is the number of processes on this machine.
|
|
|
|
Useful if you want to assign one gpu per machine
|
|
"""
|
|
this_node = platform.node()
|
|
ranks_nodes = comm.allgather((comm.Get_rank(), this_node))
|
|
node2rankssofar = defaultdict(int)
|
|
local_rank = None
|
|
for (rank, node) in ranks_nodes:
|
|
if rank == comm.Get_rank():
|
|
local_rank = node2rankssofar[node]
|
|
node2rankssofar[node] += 1
|
|
assert local_rank is not None
|
|
return local_rank, node2rankssofar[this_node]
|
|
|
|
def share_file(comm, path):
|
|
"""
|
|
Copies the file from rank 0 to all other ranks
|
|
Puts it in the same place on all machines
|
|
"""
|
|
localrank, _ = get_local_rank_size(comm)
|
|
if comm.Get_rank() == 0:
|
|
with open(path, 'rb') as fh:
|
|
data = fh.read()
|
|
comm.bcast(data)
|
|
else:
|
|
data = comm.bcast(None)
|
|
if localrank == 0:
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
with open(path, 'wb') as fh:
|
|
fh.write(data)
|
|
comm.Barrier()
|
|
|
|
def dict_gather(comm, d, op='mean', assert_all_have_data=True):
|
|
if comm is None: return d
|
|
alldicts = comm.allgather(d)
|
|
size = comm.size
|
|
k2li = defaultdict(list)
|
|
for d in alldicts:
|
|
for (k,v) in d.items():
|
|
k2li[k].append(v)
|
|
result = {}
|
|
for (k,li) in k2li.items():
|
|
if assert_all_have_data:
|
|
assert len(li)==size, "only %i out of %i MPI workers have sent '%s'" % (len(li), size, k)
|
|
if op=='mean':
|
|
result[k] = np.mean(li, axis=0)
|
|
elif op=='sum':
|
|
result[k] = np.sum(li, axis=0)
|
|
else:
|
|
assert 0, op
|
|
return result
|