Files
baselines/baselines/bench/monitor.py
pzhokhov fe06c6b4db continuous action spaces for codegen + some benchmarking (#82)
* add some docstrings

* start making big changes

* state machine redesign

* sampling seems to work

* some reorg

* fixed sampling of real vals

* json conversion

* made it possible to register new commands
got nontrivial version of Pred working

* consolidate command definitions

* add more macro blocks

* revived visualization

* rename Userdata -> CmdInterpreter
make AlgoSmInstance subclass of SmInstance that uses appropriate userdata argument

* replace userdata by ci when appropriate

* minor test fixes

* revamped handmade dir, can run ppo_metal

* seed to avoid random test failure

* implement AlgoAgent

* Autogenerated object that performs all ops and macros

* more CmdRecorder changes

* move files around

* move MatchProb and JtftProb

* remove obsolete

* fix tests involving AlgoAgent (pending the next commit on ppo_metal code)

* ppo_metal: reduce duplication in policy_gen, make sess an attribute of PpoAgent and StochasticPolicy instead of using get_default_session everywhere.

* maze_env reformatting, move algo_search script (but stil broken)

* move agent.py

* fix test on handcrafted agents

* tuning/fixing ppo_metal baseline

* minor

* Fix ppo_metal baseline

* Don’t set epcount, tcount unless they’re being used

* get rid of old ppo_metal baseline

* fixes for handmade/run.py tuning

* fix codegen ppo

* fix handmade ppo hps

* fix test, go back to safe_div

* switch to more complex filtering

* make sure all handcrafted algos have finite probability

* train to maximize logprob of provided samples
Trex changes to avoid segfault

* AlgoSm also includes global hyperparams

* don’t duplicate global hyperparam defaults

* create generic_ob_ac_space function

* use sorted list of outkeys

* revive tsne

* todo changes

* determinism test

* todo + test fix

* remove a few deprecated files, rename other tests so they don’t run automatically, fix real test failure

* continuous control with codegen

* continuous control with codegen

* implement continuous action space algodistr

* ppo with trex RUN BENCHMARKS

* wrap trex in a monitor

* dummy commit to RUN BENCHMARKS

* adding monitor to trex env RUN BENCHMARKS

* adding monitor to trex RUN BENCHMARKS

* include monitor into trex env RUN BENCHMARKS

* generate nll and predmean using Distribution node

* dummy commit to RUN BENCHMARKS

* include pybullet into baselines optional dependencies

* dummy commit to RUN BENCHMARKS

* install games for cron rcall user RUN BENCHMARKS

* add --yes flag to install.py in rcall config for cron user RUN BENCHMARKS

* both continuous and discrete versions seem to run

* fixes to monitor to work with vecenv-like info and rewards RUN BENCHMARKS

* dummy commit to RUN BENCHMARKS

* removed shape check from one-hot encoding logic in distributions.CategoricalPd

* reset logger configuration in codegen/handmade/run.py to be in-line with baselines RUN BENCHMARKS

* merged peterz_codegen_benchmarks RUN BENCHMARKS

* skip tests RUN BENCHMARKS

* working on test failures

* save benchmark dicts RUN BENCHMARK

* merged peterz_codegen_benchmark RUN BENCHMARKS

* add get_git_commit_message to the baselines.common.console_util

* dummy commit to RUN BENCHMARKS

* merged fixes from peterz_codegen_benchmark RUN BENCHMARKS

* fixing failure in test_algo_nll WIP

* test_algo_nll passes with both ppo and softq

* re-enabled tests

* run trex on gpus for 100k total (horizon=100k / 16) RUN BENCHMARKS

* merged latest peterz_codegen_benchmarks RUN BENCHMARKS

* fixing codegen test failures (logging-related)

* fixed name collision in run-benchmarks-new.py RUN BENCHMARKS

* fixed name collision in run-benchmarks-new.py RUN BENCHMARKS

* fixed import in node_filters.py

* test_algo_search passes

* some cleanup

* dummy commit to RUN BENCHMARKS

* merge fast fail for subprocvecenv RUN BENCHMARKS

* use SubprocVecEnv in sonic_prob

* added deprecation note to shmem_vec_env

* allow indexing of distributions

* add timeout to pipeline.yaml

* typo in pipeline.yml

* run tests with --forked option

* resolved merge conflict in rl_algs.bench.benchmarks

* re-enable parallel tests

* fix remaining merge conflicts and syntax

* Update trex_prob.py

* fixes to ResultsWriter

* take baselines/run.py from peterz_codegen branch

* actually save stuff to file in VecMonitor RUN BENCHMARKS

* enable parallel tests

* merge stricter flake8

* merge peterz_codegen_benchmark, resolve conflicts

* autopep8

* remove traces of Monitor from trex env, check shapes before encoding in CategoricalPd

* asserts and warnings to make q -> distribution change more explicit

* fixed assert in CategoricalPd

* add header to vec_monitor output file RUN BENCHMARKS

* make VecMonitor write header to the output file

* remove deprecation message from shmem_vec_env RUN BENCHMARKS

* autopep8

* proper shape test in distributions.py

* ResultsWriter can take dict headers

* dummy commit to RUN BENCHMARKS

* replace assert len(qs)==1 with warning RUN BENCHMARKS

* removed pdb from ppo2 RUN BENCHMARKS
2018-09-14 15:43:49 -07:00

190 lines
6.3 KiB
Python

__all__ = ['Monitor', 'get_monitor_files', 'load_results']
import gym
from gym.core import Wrapper
import time
from glob import glob
import csv
import os.path as osp
import json
import numpy as np
class Monitor(Wrapper):
EXT = "monitor.csv"
f = None
def __init__(self, env, filename, allow_early_resets=False, reset_keywords=(), info_keywords=()):
Wrapper.__init__(self, env=env)
self.tstart = time.time()
self.results_writer = ResultsWriter(
filename,
header={"t_start": time.time(), 'env_id' : env.spec and env.spec.id},
extra_keys=reset_keywords + info_keywords
)
self.reset_keywords = reset_keywords
self.info_keywords = info_keywords
self.allow_early_resets = allow_early_resets
self.rewards = None
self.needs_reset = True
self.episode_rewards = []
self.episode_lengths = []
self.episode_times = []
self.total_steps = 0
self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
def reset(self, **kwargs):
self.reset_state()
for k in self.reset_keywords:
v = kwargs.get(k)
if v is None:
raise ValueError('Expected you to pass kwarg %s into reset'%k)
self.current_reset_info[k] = v
return self.env.reset(**kwargs)
def reset_state(self):
if not self.allow_early_resets and not self.needs_reset:
raise RuntimeError("Tried to reset an environment before done. If you want to allow early resets, wrap your env with Monitor(env, path, allow_early_resets=True)")
self.rewards = []
self.needs_reset = False
def step(self, action):
if self.needs_reset:
raise RuntimeError("Tried to step environment that needs reset")
ob, rew, done, info = self.env.step(action)
self.update(ob, rew, done, info)
return (ob, rew, done, info)
def update(self, ob, rew, done, info):
self.rewards.append(rew)
if done:
self.needs_reset = True
eprew = sum(self.rewards)
eplen = len(self.rewards)
epinfo = {"r": round(eprew, 6), "l": eplen, "t": round(time.time() - self.tstart, 6)}
for k in self.info_keywords:
epinfo[k] = info[k]
self.episode_rewards.append(eprew)
self.episode_lengths.append(eplen)
self.episode_times.append(time.time() - self.tstart)
epinfo.update(self.current_reset_info)
self.results_writer.write_row(epinfo)
if isinstance(info, dict):
info['episode'] = epinfo
self.total_steps += 1
def close(self):
if self.f is not None:
self.f.close()
def get_total_steps(self):
return self.total_steps
def get_episode_rewards(self):
return self.episode_rewards
def get_episode_lengths(self):
return self.episode_lengths
def get_episode_times(self):
return self.episode_times
class LoadMonitorResultsError(Exception):
pass
class ResultsWriter(object):
def __init__(self, filename=None, header='', extra_keys=()):
self.extra_keys = extra_keys
if filename is None:
self.f = None
self.logger = None
else:
if not filename.endswith(Monitor.EXT):
if osp.isdir(filename):
filename = osp.join(filename, Monitor.EXT)
else:
filename = filename + "." + Monitor.EXT
self.f = open(filename, "wt")
if isinstance(header, dict):
header = '# {} \n'.format(json.dumps(header))
self.f.write(header)
self.logger = csv.DictWriter(self.f, fieldnames=('r', 'l', 't')+tuple(extra_keys))
self.logger.writeheader()
self.f.flush()
def write_row(self, epinfo):
if self.logger:
self.logger.writerow(epinfo)
self.f.flush()
def get_monitor_files(dir):
return glob(osp.join(dir, "*" + Monitor.EXT))
def load_results(dir):
import pandas
monitor_files = (
glob(osp.join(dir, "*monitor.json")) +
glob(osp.join(dir, "*monitor.csv"))) # get both csv and (old) json files
if not monitor_files:
raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, dir))
dfs = []
headers = []
for fname in monitor_files:
with open(fname, 'rt') as fh:
if fname.endswith('csv'):
firstline = fh.readline()
if not firstline:
continue
assert firstline[0] == '#'
header = json.loads(firstline[1:])
df = pandas.read_csv(fh, index_col=None)
headers.append(header)
elif fname.endswith('json'): # Deprecated json format
episodes = []
lines = fh.readlines()
header = json.loads(lines[0])
headers.append(header)
for line in lines[1:]:
episode = json.loads(line)
episodes.append(episode)
df = pandas.DataFrame(episodes)
else:
assert 0, 'unreachable'
df['t'] += header['t_start']
dfs.append(df)
df = pandas.concat(dfs)
df.sort_values('t', inplace=True)
df.reset_index(inplace=True)
df['t'] -= min(header['t_start'] for header in headers)
df.headers = headers # HACK to preserve backwards compatibility
return df
def test_monitor():
env = gym.make("CartPole-v1")
env.seed(0)
mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4()
menv = Monitor(env, mon_file)
menv.reset()
for _ in range(1000):
_, _, done, _ = menv.step(0)
if done:
menv.reset()
f = open(mon_file, 'rt')
firstline = f.readline()
assert firstline.startswith('#')
metadata = json.loads(firstline[1:])
assert metadata['env_id'] == "CartPole-v1"
assert set(metadata.keys()) == {'env_id', 'gym_version', 't_start'}, "Incorrect keys in monitor metadata"
last_logline = pandas.read_csv(f, index_col=None)
assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline"
f.close()
os.remove(mon_file)