Add benchmark support (#338)

* Warn if seed doesn't return a list

* Add preliminary BenchmarkRun support

* Add experimental benchmark registration

* Flesh out interface

* Add preliminary BenchmarkRun support

* Warn if seed doesn't return a list

* Add experimental benchmark registration

* Flesh out interface

* Make benchmarkrun upload recursive

* Add evaluation episodes

* Add benchmark scoring

* Tweak reward locations

* Tweak scoring

* Clear default metadata in Wrapper

* Improve scoring

* Expose registry; fix test

* Add initial_reset_timestamp

* Add back algorithm; fix tests
This commit is contained in:
Greg Brockman
2016-09-23 01:04:26 -07:00
committed by GitHub
parent 0c8c055fca
commit 934b2acbb7
14 changed files with 427 additions and 20 deletions

View File

@@ -14,18 +14,20 @@ import gym
logger = logging.getLogger() logger = logging.getLogger()
class Uploader(object): class Uploader(object):
def __init__(self, training_dir, algorithm_id, writeup): def __init__(self, training_dir, algorithm_id, benchmark_run_id, writeup):
self.training_dir = training_dir self.training_dir = training_dir
self.algorithm_id = algorithm_id self.algorithm_id = algorithm_id
self.benchmark_run_id = benchmark_run_id
self.writeup = writeup self.writeup = writeup
def run(self): def run(self):
gym.upload(self.training_dir, algorithm_id=self.algorithm_id, writeup=self.writeup) gym.upload(self.training_dir, algorithm_id=self.algorithm_id, benchmark_run_id=self.benchmark_run_id, writeup=self.writeup)
def main(): def main():
parser = argparse.ArgumentParser(description=None) parser = argparse.ArgumentParser(description=None)
parser.add_argument('-t', '--training-dir', required=True, help='What directory to upload.') parser.add_argument('-t', '--training-dir', required=True, help='What directory to upload.')
parser.add_argument('-a', '--algorithm_id', help='Set the algorithm id.') parser.add_argument('-a', '--algorithm_id', help='Set the algorithm id.')
parser.add_argument('-b', '--benchmark-run-id', help='Set the algorithm id.')
parser.add_argument('-w', '--writeup', help='Writeup to attach.') parser.add_argument('-w', '--writeup', help='Writeup to attach.')
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
args = parser.parse_args() args = parser.parse_args()
@@ -35,7 +37,7 @@ def main():
elif args.verbosity >= 1: elif args.verbosity >= 1:
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, writeup=args.writeup) runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, benchmark_run_id=args.benchmark_run_id, writeup=args.writeup)
runner.run() runner.run()
return 0 return 0

View File

@@ -32,6 +32,7 @@ del logger_setup
sanity_check_dependencies() sanity_check_dependencies()
from gym.core import Env, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper from gym.core import Env, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
from gym.benchmarks import benchmark_spec
from gym.envs import make, spec from gym.envs import make, spec
from gym.scoreboard.api import upload from gym.scoreboard.api import upload

View File

@@ -0,0 +1,76 @@
# EXPERIMENTAL: all may be removed soon
import numpy as np
from gym.benchmarks import scoring
from gym.benchmarks.registration import register_benchmark, benchmark_spec, registry
register_benchmark(
id='Atari7Pixel-v0',
scorer=scoring.ClipTo01ThenAverage(),
description='7 Atari games, with pixel observations',
task_groups={
'BeamRider-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Breakout-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Enduro-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Pong-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Qbert-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Seaquest-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'SpaceInvaders-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
})
register_benchmark(
id='Atari7Ram-v0',
description='7 Atari games, with RAM observations',
scorer=scoring.ClipTo01ThenAverage(),
task_groups={
'BeamRider-ram-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Breakout-ram-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Enduro-ram-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Pong-ram-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Qbert-ram-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'Seaquest-ram-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
'SpaceInvaders-ram-v0': [{
'seeds': 1,
'timesteps': 10000000
}],
})

View File

@@ -0,0 +1,65 @@
# EXPERIMENTAL: all may be removed soon
import collections
import gym.envs
import logging
from gym import error
logger = logging.getLogger(__name__)
class Task(object):
def __init__(self, env_id, seeds, timesteps, reward_floor, reward_ceiling):
self.env_id = env_id
self.seeds = seeds
self.timesteps = timesteps
self.reward_floor = reward_floor
self.reward_ceiling = reward_ceiling
class Benchmark(object):
def __init__(self, id, scorer, task_groups, description=None):
self.id = id
self.scorer = scorer
self.description = description
task_map = {}
for env_id, tasks in task_groups.items():
task_map[env_id] = []
for task in tasks:
task_map[env_id].append(Task(
env_id=env_id,
seeds=task['seeds'],
timesteps=task['timesteps'],
reward_floor=task.get('reward_floor', 0),
reward_ceiling=task.get('reward_ceiling', 100),
))
self.task_groups = task_map
def task_spec(self, env_id):
try:
return self.task_groups[env_id]
except KeyError:
raise error.Unregistered('No task with env_id {} registered for benchmark {}', env_id, self.id)
def score_evaluation(self, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp):
return self.scorer.score_evaluation(self, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp)
def score_benchmark(self, score_map):
return self.scorer.score_benchmark(self, score_map)
class Registry(object):
def __init__(self):
self.benchmarks = collections.OrderedDict()
def register_benchmark(self, id, **kwargs):
self.benchmarks[id] = Benchmark(id=id, **kwargs)
def benchmark_spec(self, id):
try:
return self.benchmarks[id]
except KeyError:
raise error.UnregisteredBenchmark('No registered benchmark with id: {}'.format(id))
registry = Registry()
register_benchmark = registry.register_benchmark
benchmark_spec = registry.benchmark_spec

99
gym/benchmarks/scoring.py Normal file
View File

@@ -0,0 +1,99 @@
import logging
import numpy as np
from gym import envs
logger = logging.getLogger(__name__)
class ClipTo01ThenAverage(object):
def __init__(self, num_episodes=100):
self.num_episodes = num_episodes
def score_evaluation(self, benchmark, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp):
tasks = benchmark.task_groups[env_id]
spec = envs.spec(env_id)
(t_idx,) = np.where([t == 't' for t in episode_types]) # training episodes
(e_idx,) = np.where([t == 'e' for t in episode_types]) # evaluation episodes
if len(e_idx) == 0:
# If no episodes marked for evaluation, consider
# everything both a training and evaluation episode.
(t_idx,) = np.where([True for t in episode_types])
(e_idx,) = np.where([True for t in episode_types])
training_lengths = np.array(episode_lengths)[t_idx]
training_rewards = np.array(episode_rewards)[t_idx]
evaluation_lengths = np.array(episode_lengths)[e_idx]
evaluation_rewards = np.array(episode_rewards)[e_idx]
# How many training timesteps have elapsed by the end of each
# episode. Not to be confused with Unix timestamps.
elapsed_timesteps = np.cumsum(training_lengths)
scores = []
solves = []
rewards = []
_timestamps = []
for task in tasks:
# Find the first episode where we're over the allotted
# training timesteps.
(cutoff,) = np.where(elapsed_timesteps > task.timesteps)
if len(cutoff) > 0:
cutoff_idx = cutoff[-1]
orig_cutoff_idx = t_idx[cutoff_idx] # cutoff index in the original
(allowed_e_idx,) = np.where(e_idx < orig_cutoff_idx) # restrict to earlier episodes
else:
# All episodes are fair game
allowed_e_idx = e_idx
if len(allowed_e_idx) > 0:
last_timestamp = timestamps[allowed_e_idx[-1]]
else:
# If we don't have any evaluation episodes, then the
# last valid timestamp is when we started.
last_timestamp = initial_reset_timestamp
# Grab the last num_episodes evaluation episodes from
# before the cutoff (at which point we've gathered too
# much experience).
#
# This probably won't work long-term but is fine for now.
allowed_episode_rewards = np.array(episode_rewards)[allowed_e_idx]
reward = allowed_episode_rewards[-self.num_episodes:]
if len(reward) == 0:
logger.info('No rewards for %s', env_id)
scores.append(0)
return
floor = task.reward_floor
ceiling = task.reward_ceiling
# Grab the indexes where we reached the ceiling
solved = reward >= ceiling
# Linearly rescale rewards to between 0 and 1
clipped = np.clip((reward - floor) / (ceiling - floor), 0, 1)
# Take the mean rescaled score
score = np.mean(clipped)
scores.append(score)
# Record the list of solved episodes
solves.append(solved)
# Record the list of rewards
rewards.append(reward)
# Record the timestamp of the last episode timestamp
_timestamps.append(last_timestamp)
return {
'rewards': rewards,
'scores': scores,
'solves': solves,
'timestamps': _timestamps,
}
def score_benchmark(self, benchmark, episode_scores):
all_scores = []
for env_id, scores in episode_scores.items():
all_scores += scores
return np.mean(all_scores)

View File

@@ -0,0 +1,55 @@
import numpy as np
import gym
from gym import monitoring
from gym.monitoring.tests import helpers
from gym.benchmarks import registration, scoring
def test():
benchmark = registration.Benchmark(
id='MyBenchmark-v0',
scorer=scoring.ClipTo01ThenAverage(),
task_groups={
'CartPole-v0': [{
'seeds': 1,
'timesteps': 5
}, {
'seeds': 1,
'timesteps': 100
}],
})
with helpers.tempdir() as temp:
env = gym.make('CartPole-v0')
env.monitor.start(temp, video_callable=False, seed=0)
env.monitor.configure(mode='evaluation')
rollout(env)
env.monitor.configure(mode='training')
for i in range(2):
rollout(env)
env.monitor.configure(mode='evaluation')
rollout(env, good=True)
env.monitor.close()
results = monitoring.load_results(temp)
evaluation_score = benchmark.score_evaluation('CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp'])
benchmark_score = benchmark.score_benchmark({
'CartPole-v0': evaluation_score['scores'],
})
assert np.all(np.isclose(evaluation_score['scores'], [0.089999999999999997, 0.27000000000000002])), "evaluation_score={}".format(evaluation_score)
assert np.isclose(benchmark_score, 0.18), "benchmark_score={}".format(benchmark_score)
def rollout(env, good=False):
env.reset()
action = 0
d = False
while not d:
if good:
action = 1 - action
o,r,d,i = env.step(action)

View File

@@ -5,7 +5,19 @@ class Error(Exception):
# Local errors # Local errors
class UnregisteredEnv(Error): class Unregistered(Error):
"""Raised when the user requests an item from the registry that does
not actually exist.
"""
pass
class UnregisteredEnv(Unregistered):
"""Raised when the user requests an env from the registry that does
not actually exist.
"""
pass
class UnregisteredBenchmark(Unregistered):
"""Raised when the user requests an env from the registry that does """Raised when the user requests an env from the registry that does
not actually exist. not actually exist.
""" """

View File

@@ -1,3 +1,3 @@
from gym.monitoring.monitor import Monitor, load_results, _open_monitors from gym.monitoring.monitor import Monitor, load_results, detect_training_manifests, _open_monitors
from gym.monitoring.stats_recorder import StatsRecorder from gym.monitoring.stats_recorder import StatsRecorder
from gym.monitoring.video_recorder import VideoRecorder from gym.monitoring.video_recorder import VideoRecorder

View File

@@ -17,8 +17,10 @@ logger = logging.getLogger(__name__)
FILE_PREFIX = 'openaigym' FILE_PREFIX = 'openaigym'
MANIFEST_PREFIX = FILE_PREFIX + '.manifest' MANIFEST_PREFIX = FILE_PREFIX + '.manifest'
def detect_training_manifests(training_dir): def detect_training_manifests(training_dir, files=None):
return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(MANIFEST_PREFIX + '.')] if files is None:
files = os.listdir(training_dir)
return [os.path.join(training_dir, f) for f in files if f.startswith(MANIFEST_PREFIX + '.')]
def detect_monitor_files(training_dir): def detect_monitor_files(training_dir):
return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(FILE_PREFIX + '.')] return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(FILE_PREFIX + '.')]
@@ -74,7 +76,6 @@ class Monitor(object):
Attributes: Attributes:
id (Optional[str]): The ID of the monitored environment id (Optional[str]): The ID of the monitored environment
""" """
def __init__(self, env): def __init__(self, env):
@@ -216,15 +217,28 @@ class Monitor(object):
logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory) logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
def configure(self, video_callable=None): def configure(self, video_callable=None, mode=None):
"""Reconfigure the monitor. """Reconfigure the monitor.
video_callable (function): Whether to record video to upload to the scoreboard. video_callable (function): Whether to record video to upload to the scoreboard.
mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
""" """
if not self.enabled:
raise error.Error('Can only configure an enabled monitor. (HINT: did you already close this monitor?)')
if video_callable is not None: if video_callable is not None:
self.video_callable = video_callable self.video_callable = video_callable
if mode is not None:
if mode == 'evaluation':
type = 'e'
elif mode == 'training':
type = 't'
else:
raise error.Error('Invalid mode {}: must be "training" or "evaluation"', mode)
self.stats_recorder.type = type
def _before_step(self, action): def _before_step(self, action):
if not self.enabled: return if not self.enabled: return
self.stats_recorder.before_step(action) self.stats_recorder.before_step(action)
@@ -331,7 +345,7 @@ def load_results(training_dir):
main_seeds.append(None) main_seeds.append(None)
env_info = collapse_env_infos(env_infos, training_dir) env_info = collapse_env_infos(env_infos, training_dir)
timestamps, episode_lengths, episode_rewards, initial_reset_timestamp = merge_stats_files(stats_files) timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp = merge_stats_files(stats_files)
return { return {
'manifests': manifests, 'manifests': manifests,
@@ -339,6 +353,7 @@ def load_results(training_dir):
'timestamps': timestamps, 'timestamps': timestamps,
'episode_lengths': episode_lengths, 'episode_lengths': episode_lengths,
'episode_rewards': episode_rewards, 'episode_rewards': episode_rewards,
'episode_types': episode_types,
'initial_reset_timestamp': initial_reset_timestamp, 'initial_reset_timestamp': initial_reset_timestamp,
'videos': videos, 'videos': videos,
'main_seeds': main_seeds, 'main_seeds': main_seeds,
@@ -349,6 +364,7 @@ def merge_stats_files(stats_files):
timestamps = [] timestamps = []
episode_lengths = [] episode_lengths = []
episode_rewards = [] episode_rewards = []
episode_types = []
initial_reset_timestamps = [] initial_reset_timestamps = []
for path in stats_files: for path in stats_files:
@@ -358,6 +374,8 @@ def merge_stats_files(stats_files):
timestamps += content['timestamps'] timestamps += content['timestamps']
episode_lengths += content['episode_lengths'] episode_lengths += content['episode_lengths']
episode_rewards += content['episode_rewards'] episode_rewards += content['episode_rewards']
# Recent addition
episode_types += content.get('episode_types', [])
initial_reset_timestamps.append(content['initial_reset_timestamp']) initial_reset_timestamps.append(content['initial_reset_timestamp'])
idxs = np.argsort(timestamps) idxs = np.argsort(timestamps)
@@ -365,12 +383,17 @@ def merge_stats_files(stats_files):
episode_lengths = np.array(episode_lengths)[idxs].tolist() episode_lengths = np.array(episode_lengths)[idxs].tolist()
episode_rewards = np.array(episode_rewards)[idxs].tolist() episode_rewards = np.array(episode_rewards)[idxs].tolist()
if episode_types:
episode_types = np.array(episode_types)[idxs].tolist()
else:
episode_types = None
if len(initial_reset_timestamps) > 0: if len(initial_reset_timestamps) > 0:
initial_reset_timestamp = min(initial_reset_timestamps) initial_reset_timestamp = min(initial_reset_timestamps)
else: else:
initial_reset_timestamp = 0 initial_reset_timestamp = 0
return timestamps, episode_lengths, episode_rewards, initial_reset_timestamp return timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp
def collapse_env_infos(env_infos, training_dir): def collapse_env_infos(env_infos, training_dir):
assert len(env_infos) > 0 assert len(env_infos) > 0

View File

@@ -12,6 +12,8 @@ class StatsRecorder(object):
self.file_prefix = file_prefix self.file_prefix = file_prefix
self.episode_lengths = [] self.episode_lengths = []
self.episode_rewards = [] self.episode_rewards = []
self.episode_types = [] # experimental addition
self._type = 't'
self.timestamps = [] self.timestamps = []
self.steps = None self.steps = None
self.rewards = None self.rewards = None
@@ -22,6 +24,16 @@ class StatsRecorder(object):
filename = '{}.stats.json'.format(self.file_prefix) filename = '{}.stats.json'.format(self.file_prefix)
self.path = os.path.join(self.directory, filename) self.path = os.path.join(self.directory, filename)
@property
def type(self):
return self._type
@type.setter
def type(self, type):
if type not in ['t', 'e']:
raise error.Error('Invalid episode type {}: must be t for training or e for evaluation', type)
self._type = type
def before_step(self, action): def before_step(self, action):
assert not self.closed assert not self.closed
@@ -47,6 +59,10 @@ class StatsRecorder(object):
self.save_complete() self.save_complete()
self.steps = 0 self.steps = 0
self.rewards = 0 self.rewards = 0
# We write the type at the beginning of the episode. If a user
# changes the type, it's more natural for it to apply next
# time the user calls reset().
self.episode_types.append(self._type)
def save_complete(self): def save_complete(self):
if self.steps is not None: if self.steps is not None:
@@ -69,4 +85,5 @@ class StatsRecorder(object):
'timestamps': self.timestamps, 'timestamps': self.timestamps,
'episode_lengths': self.episode_lengths, 'episode_lengths': self.episode_lengths,
'episode_rewards': self.episode_rewards, 'episode_rewards': self.episode_rewards,
'episode_types': self.episode_types,
}, f) }, f)

View File

@@ -8,7 +8,7 @@ http://rst.ninjs.org/
import os import os
from gym.scoreboard.client.resource import Algorithm, Evaluation, FileUpload from gym.scoreboard.client.resource import Algorithm, BenchmarkRun, Evaluation, FileUpload
from gym.scoreboard.registration import registry, add_task, add_group from gym.scoreboard.registration import registry, add_task, add_group
# Discover API key from the environment. (You should never have to # Discover API key from the environment. (You should never have to

View File

@@ -15,17 +15,46 @@ logger = logging.getLogger(__name__)
video_name_re = re.compile('^[\w.-]+\.(mp4|avi|json)$') video_name_re = re.compile('^[\w.-]+\.(mp4|avi|json)$')
metadata_name_re = re.compile('^[\w.-]+\.meta\.json$') metadata_name_re = re.compile('^[\w.-]+\.meta\.json$')
def upload(training_dir, algorithm_id=None, writeup=None, api_key=None, ignore_open_monitors=False): def upload(training_dir, algorithm_id=None, writeup=None, benchmark_id=None, api_key=None, ignore_open_monitors=False):
"""Upload the results of training (as automatically recorded by your """Upload the results of training (as automatically recorded by your
env's monitor) to OpenAI Gym. env's monitor) to OpenAI Gym.
Args: Args:
training_dir (Optional[str]): A directory containing the results of a training run. training_dir (Optional[str]): A directory containing the results of a training run.
algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id) algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id)
benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation. writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY). api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
""" """
if benchmark_id:
# TODO: validate the number of matching evaluations
benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id)
benchmark_run_id = benchmark_run.id
recurse = True
# Don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
algorithm_id = None
else:
benchmark_run_id = None
recurse = False
# Discover training directories
directories = []
if recurse:
for name, _, files in os.walk(training_dir):
if monitoring.detect_training_manifests(name, files=files):
directories.append(name)
else:
directories.append(training_dir)
# Actually do the uploads.
for training_dir in directories:
_upload(training_dir, algorithm_id, writeup, benchmark_run_id, api_key, ignore_open_monitors)
return benchmark_run_id
def _upload(training_dir, algorithm_id=None, writeup=None, benchmark_run_id=None, api_key=None, ignore_open_monitors=False):
if not ignore_open_monitors: if not ignore_open_monitors:
open_monitors = monitoring._open_monitors() open_monitors = monitoring._open_monitors()
if len(open_monitors) > 0: if len(open_monitors) > 0:
@@ -57,6 +86,7 @@ def upload(training_dir, algorithm_id=None, writeup=None, api_key=None, ignore_o
algorithm={ algorithm={
'id': algorithm_id, 'id': algorithm_id,
}, },
benchmark_run_id=benchmark_run_id,
writeup=writeup, writeup=writeup,
gym_version=env_info['gym_version'], gym_version=env_info['gym_version'],
api_key=api_key, api_key=api_key,
@@ -89,6 +119,7 @@ def upload_training_data(training_dir, api_key=None):
timestamps = results['timestamps'] timestamps = results['timestamps']
episode_lengths = results['episode_lengths'] episode_lengths = results['episode_lengths']
episode_rewards = results['episode_rewards'] episode_rewards = results['episode_rewards']
episode_types = results['episode_types']
main_seeds = results['main_seeds'] main_seeds = results['main_seeds']
seeds = results['seeds'] seeds = results['seeds']
videos = results['videos'] videos = results['videos']
@@ -98,7 +129,7 @@ def upload_training_data(training_dir, api_key=None):
# Do the relevant uploads # Do the relevant uploads
if len(episode_lengths) > 0: if len(episode_lengths) > 0:
training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key, env_id=env_id) training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, episode_types, timestamps, main_seeds, seeds, api_key, env_id=env_id)
else: else:
training_episode_batch = None training_episode_batch = None
@@ -114,12 +145,13 @@ def upload_training_data(training_dir, api_key=None):
return env_info, training_episode_batch, training_video return env_info, training_episode_batch, training_video
def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key=None, env_id=None): def upload_training_episode_batch(episode_lengths, episode_rewards, episode_types, timestamps, main_seeds, seeds, api_key=None, env_id=None):
logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths)) logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key) file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
file_upload.put({ file_upload.put({
'episode_lengths': episode_lengths, 'episode_lengths': episode_lengths,
'episode_rewards': episode_rewards, 'episode_rewards': episode_rewards,
'episode_types': episode_types,
'timestamps': timestamps, 'timestamps': timestamps,
'main_seeds': main_seeds, 'main_seeds': main_seeds,
'seeds': seeds, 'seeds': seeds,

View File

@@ -13,6 +13,7 @@ def convert_to_gym_object(resp, api_key):
types = { types = {
'evaluation': Evaluation, 'evaluation': Evaluation,
'file': FileUpload, 'file': FileUpload,
'benchmark_run': BenchmarkRun,
} }
if isinstance(resp, list): if isinstance(resp, list):
@@ -229,7 +230,7 @@ class APIResource(GymObject):
if cls == APIResource: if cls == APIResource:
raise NotImplementedError( raise NotImplementedError(
'APIResource is an abstract class. You should perform ' 'APIResource is an abstract class. You should perform '
'actions on its subclasses (e.g. Charge, Customer)') 'actions on its subclasses')
return str(urllib.parse.quote_plus(cls.__name__.lower())) return str(urllib.parse.quote_plus(cls.__name__.lower()))
@classmethod @classmethod
@@ -381,3 +382,11 @@ class Evaluation(CreateableAPIResource):
class Algorithm(CreateableAPIResource): class Algorithm(CreateableAPIResource):
pass pass
class BenchmarkRun(CreateableAPIResource, UpdateableAPIResource):
@classmethod
def class_name(cls):
return 'benchmark_run'
def commit(self):
return self.request('post', '{}/commit'.format(self.instance_path()))

View File

@@ -16,13 +16,14 @@ def score_from_remote(url):
parsed = result.json() parsed = result.json()
episode_lengths = parsed['episode_lengths'] episode_lengths = parsed['episode_lengths']
episode_rewards = parsed['episode_rewards'] episode_rewards = parsed['episode_rewards']
episode_types = parsed.get('episode_types')
timestamps = parsed['timestamps'] timestamps = parsed['timestamps']
# Handle legacy entries where initial_reset_timestamp wasn't set # Handle legacy entries where initial_reset_timestamp wasn't set
initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0]) initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0])
env_id = parsed['env_id'] env_id = parsed['env_id']
spec = gym.spec(env_id) spec = gym.spec(env_id)
return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold) return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
def score_from_local(directory): def score_from_local(directory):
"""Calculate score from a local results directory""" """Calculate score from a local results directory"""
@@ -33,15 +34,24 @@ def score_from_local(directory):
episode_lengths = results['episode_lengths'] episode_lengths = results['episode_lengths']
episode_rewards = results['episode_rewards'] episode_rewards = results['episode_rewards']
episode_types = results['episode_types']
timestamps = results['timestamps'] timestamps = results['timestamps']
initial_reset_timestamp = results['initial_reset_timestamp'] initial_reset_timestamp = results['initial_reset_timestamp']
spec = gym.spec(results['env_info']['env_id']) spec = gym.spec(results['env_info']['env_id'])
return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold) return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
def score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, trials, reward_threshold): def score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, trials, reward_threshold):
"""Method to calculate the score from merged monitor files. """Method to calculate the score from merged monitor files. Scores
only a single environment; mostly legacy.
""" """
if episode_types is not None:
# Select only the training episodes
t_idx = np.where(e == 't' for e in episode_types)
episode_lengths = np.array(episode_lengths)[t_idx]
episode_rewards = np.array(episode_rewards)[t_idx]
timestamps = np.array(timestamps)[t_idx]
# Make sure everything is a float -- no pesky ints. # Make sure everything is a float -- no pesky ints.
episode_rewards = np.array(episode_rewards, dtype='float64') episode_rewards = np.array(episode_rewards, dtype='float64')
@@ -87,6 +97,12 @@ def score_from_merged(episode_lengths, episode_rewards, timestamps, initial_rese
'seconds_in_total': seconds_in_total, 'seconds_in_total': seconds_in_total,
} }
def benchmark_score_from_merged(benchmark, env_id, episode_lengths, episode_rewards, episode_types):
"""Method to calculate an environment's benchmark score from merged
monitor files.
"""
return benchmark.score(benchmark, env_id, episode_lengths, episode_rewards, episode_types)
def running_mean(x, N): def running_mean(x, N):
x = np.array(x, dtype='float64') x = np.array(x, dtype='float64')
cumsum = np.cumsum(np.insert(x, 0, 0)) cumsum = np.cumsum(np.insert(x, 0, 0))