* joshim5 changes (width and height to WarpFrame wrapper) * match network output with action distribution via a linear layer only if necessary (#167) * support color vs. grayscale option in WarpFrame wrapper (#166) * support color vs. grayscale option in WarpFrame wrapper * Support color in other wrappers * Updated per Peters suggestions * fixing test failures * ppo2 with microbatches (#168) * pass microbatch_size to the model during construction * microbatch fixes and test (#169) * microbatch fixes and test * tiny cleanup * added assertions to the test * vpg-related fix * Peterz joshim5 subclass ppo2 model (#170) * microbatch fixes and test * tiny cleanup * added assertions to the test * vpg-related fix * subclassing the model to make microbatched version of model WIP * made microbatched model a subclass of ppo2 Model * flake8 complaint * mpi-less ppo2 (resolving merge conflict) * flake8 and mpi4py imports in ppo2/model.py * more un-mpying * merge master * updates to the benchmark viewer code + autopep8 (#184) * viz docs and syntactic sugar wip * update viewer yaml to use persistent volume claims * move plot_util to baselines.common, update links * use 1Tb hard drive for results viewer * small updates to benchmark vizualizer code * autopep8 * autopep8 * any folder can be a benchmark * massage games image a little bit * fixed --preload option in app.py * remove preload from run_viewer.sh * remove pdb breakpoints * update bench-viewer.yaml * fixed bug (#185) * fixed bug it's wrong to do the else statement, because no other nodes would start. * changed the fix slightly * Refactor her phase 1 (#194) * add monitor to the rollout envs in her RUN BENCHMARKS her * Slice -> Slide in her benchmarks RUN BENCHMARKS her * run her benchmark for 200 epochs * dummy commit to RUN BENCHMARKS her * her benchmark for 500 epochs RUN BENCHMARKS her * add num_timesteps to her benchmark to be compatible with viewer RUN BENCHMARKS her * add num_timesteps to her benchmark to be compatible with viewer RUN BENCHMARKS her * add num_timesteps to her benchmark to be compatible with viewer RUN BENCHMARKS her * disable saving of policies in her benchmark RUN BENCHMARKS her * run fetch benchmarks with ppo2 and ddpg RUN BENCHMARKS Fetch * run fetch benchmarks with ppo2 and ddpg RUN BENCHMARKS Fetch * launcher refactor wip * wip * her works on FetchReach * her runner refactor RUN BENCHMARKS Fetch1M * unit test for her * fixing warnings in mpi_average in her, skip test_fetchreach if mujoco is not present * pickle-based serialization in her * remove extra import from subproc_vec_env.py * investigating differences in rollout.py * try with old rollout code RUN BENCHMARKS her * temporarily use DummyVecEnv in cmd_util.py RUN BENCHMARKS her * dummy commit to RUN BENCHMARKS her * set info_values in rollout worker in her RUN BENCHMARKS her * bug in rollout_new.py RUN BENCHMARKS her * fixed bug in rollout_new.py RUN BENCHMARKS her * do not use last step because vecenv calls reset and returns obs after reset RUN BENCHMARKS her * updated buffer sizes RUN BENCHMARKS her * fixed loading/saving via joblib * dust off learning from demonstrations in HER, docs, refactor * add deprecation notice on her play and plot files * address comments by Matthias
121 lines
3.5 KiB
Python
121 lines
3.5 KiB
Python
# DEPRECATED, use baselines.common.plot_util instead
|
|
|
|
import os
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import json
|
|
import seaborn as sns; sns.set()
|
|
import glob2
|
|
import argparse
|
|
|
|
|
|
def smooth_reward_curve(x, y):
|
|
halfwidth = int(np.ceil(len(x) / 60)) # Halfwidth of our smoothing convolution
|
|
k = halfwidth
|
|
xsmoo = x
|
|
ysmoo = np.convolve(y, np.ones(2 * k + 1), mode='same') / np.convolve(np.ones_like(y), np.ones(2 * k + 1),
|
|
mode='same')
|
|
return xsmoo, ysmoo
|
|
|
|
|
|
def load_results(file):
|
|
if not os.path.exists(file):
|
|
return None
|
|
with open(file, 'r') as f:
|
|
lines = [line for line in f]
|
|
if len(lines) < 2:
|
|
return None
|
|
keys = [name.strip() for name in lines[0].split(',')]
|
|
data = np.genfromtxt(file, delimiter=',', skip_header=1, filling_values=0.)
|
|
if data.ndim == 1:
|
|
data = data.reshape(1, -1)
|
|
assert data.ndim == 2
|
|
assert data.shape[-1] == len(keys)
|
|
result = {}
|
|
for idx, key in enumerate(keys):
|
|
result[key] = data[:, idx]
|
|
return result
|
|
|
|
|
|
def pad(xs, value=np.nan):
|
|
maxlen = np.max([len(x) for x in xs])
|
|
|
|
padded_xs = []
|
|
for x in xs:
|
|
if x.shape[0] >= maxlen:
|
|
padded_xs.append(x)
|
|
|
|
padding = np.ones((maxlen - x.shape[0],) + x.shape[1:]) * value
|
|
x_padded = np.concatenate([x, padding], axis=0)
|
|
assert x_padded.shape[1:] == x.shape[1:]
|
|
assert x_padded.shape[0] == maxlen
|
|
padded_xs.append(x_padded)
|
|
return np.array(padded_xs)
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('dir', type=str)
|
|
parser.add_argument('--smooth', type=int, default=1)
|
|
args = parser.parse_args()
|
|
|
|
# Load all data.
|
|
data = {}
|
|
paths = [os.path.abspath(os.path.join(path, '..')) for path in glob2.glob(os.path.join(args.dir, '**', 'progress.csv'))]
|
|
for curr_path in paths:
|
|
if not os.path.isdir(curr_path):
|
|
continue
|
|
results = load_results(os.path.join(curr_path, 'progress.csv'))
|
|
if not results:
|
|
print('skipping {}'.format(curr_path))
|
|
continue
|
|
print('loading {} ({})'.format(curr_path, len(results['epoch'])))
|
|
with open(os.path.join(curr_path, 'params.json'), 'r') as f:
|
|
params = json.load(f)
|
|
|
|
success_rate = np.array(results['test/success_rate'])
|
|
epoch = np.array(results['epoch']) + 1
|
|
env_id = params['env_name']
|
|
replay_strategy = params['replay_strategy']
|
|
|
|
if replay_strategy == 'future':
|
|
config = 'her'
|
|
else:
|
|
config = 'ddpg'
|
|
if 'Dense' in env_id:
|
|
config += '-dense'
|
|
else:
|
|
config += '-sparse'
|
|
env_id = env_id.replace('Dense', '')
|
|
|
|
# Process and smooth data.
|
|
assert success_rate.shape == epoch.shape
|
|
x = epoch
|
|
y = success_rate
|
|
if args.smooth:
|
|
x, y = smooth_reward_curve(epoch, success_rate)
|
|
assert x.shape == y.shape
|
|
|
|
if env_id not in data:
|
|
data[env_id] = {}
|
|
if config not in data[env_id]:
|
|
data[env_id][config] = []
|
|
data[env_id][config].append((x, y))
|
|
|
|
# Plot data.
|
|
for env_id in sorted(data.keys()):
|
|
print('exporting {}'.format(env_id))
|
|
plt.clf()
|
|
|
|
for config in sorted(data[env_id].keys()):
|
|
xs, ys = zip(*data[env_id][config])
|
|
xs, ys = pad(xs), pad(ys)
|
|
assert xs.shape == ys.shape
|
|
|
|
plt.plot(xs[0], np.nanmedian(ys, axis=0), label=config)
|
|
plt.fill_between(xs[0], np.nanpercentile(ys, 25, axis=0), np.nanpercentile(ys, 75, axis=0), alpha=0.25)
|
|
plt.title(env_id)
|
|
plt.xlabel('Epoch')
|
|
plt.ylabel('Median Success Rate')
|
|
plt.legend()
|
|
plt.savefig(os.path.join(args.dir, 'fig_{}.png'.format(env_id)))
|