diff --git a/examples/scripts/upload b/examples/scripts/upload index 625354506..1d2e348ba 100755 --- a/examples/scripts/upload +++ b/examples/scripts/upload @@ -14,18 +14,20 @@ import gym logger = logging.getLogger() class Uploader(object): - def __init__(self, training_dir, algorithm_id, writeup): + def __init__(self, training_dir, algorithm_id, benchmark_run_id, writeup): self.training_dir = training_dir self.algorithm_id = algorithm_id + self.benchmark_run_id = benchmark_run_id self.writeup = writeup def run(self): - gym.upload(self.training_dir, algorithm_id=self.algorithm_id, writeup=self.writeup) + gym.upload(self.training_dir, algorithm_id=self.algorithm_id, benchmark_run_id=self.benchmark_run_id, writeup=self.writeup) def main(): parser = argparse.ArgumentParser(description=None) parser.add_argument('-t', '--training-dir', required=True, help='What directory to upload.') parser.add_argument('-a', '--algorithm_id', help='Set the algorithm id.') + parser.add_argument('-b', '--benchmark-run-id', help='Set the algorithm id.') parser.add_argument('-w', '--writeup', help='Writeup to attach.') parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') args = parser.parse_args() @@ -35,7 +37,7 @@ def main(): elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) - runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, writeup=args.writeup) + runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, benchmark_run_id=args.benchmark_run_id, writeup=args.writeup) runner.run() return 0 diff --git a/gym/__init__.py b/gym/__init__.py index 25989007c..f42746a33 100644 --- a/gym/__init__.py +++ b/gym/__init__.py @@ -32,6 +32,7 @@ del logger_setup sanity_check_dependencies() from gym.core import Env, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper +from gym.benchmarks import benchmark_spec from gym.envs import make, spec from gym.scoreboard.api import upload diff --git a/gym/benchmarks/__init__.py b/gym/benchmarks/__init__.py new file mode 100644 index 000000000..c286c2dbf --- /dev/null +++ b/gym/benchmarks/__init__.py @@ -0,0 +1,76 @@ +# EXPERIMENTAL: all may be removed soon + +import numpy as np + +from gym.benchmarks import scoring +from gym.benchmarks.registration import register_benchmark, benchmark_spec, registry + +register_benchmark( + id='Atari7Pixel-v0', + scorer=scoring.ClipTo01ThenAverage(), + description='7 Atari games, with pixel observations', + task_groups={ + 'BeamRider-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Breakout-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Enduro-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Pong-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Qbert-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Seaquest-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'SpaceInvaders-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + }) + +register_benchmark( + id='Atari7Ram-v0', + description='7 Atari games, with RAM observations', + scorer=scoring.ClipTo01ThenAverage(), + task_groups={ + 'BeamRider-ram-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Breakout-ram-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Enduro-ram-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Pong-ram-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Qbert-ram-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'Seaquest-ram-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + 'SpaceInvaders-ram-v0': [{ + 'seeds': 1, + 'timesteps': 10000000 + }], + }) diff --git a/gym/benchmarks/registration.py b/gym/benchmarks/registration.py new file mode 100644 index 000000000..11f50b7c8 --- /dev/null +++ b/gym/benchmarks/registration.py @@ -0,0 +1,65 @@ +# EXPERIMENTAL: all may be removed soon + +import collections +import gym.envs +import logging + +from gym import error + +logger = logging.getLogger(__name__) + +class Task(object): + def __init__(self, env_id, seeds, timesteps, reward_floor, reward_ceiling): + self.env_id = env_id + self.seeds = seeds + self.timesteps = timesteps + self.reward_floor = reward_floor + self.reward_ceiling = reward_ceiling + +class Benchmark(object): + def __init__(self, id, scorer, task_groups, description=None): + self.id = id + self.scorer = scorer + self.description = description + + task_map = {} + for env_id, tasks in task_groups.items(): + task_map[env_id] = [] + for task in tasks: + task_map[env_id].append(Task( + env_id=env_id, + seeds=task['seeds'], + timesteps=task['timesteps'], + reward_floor=task.get('reward_floor', 0), + reward_ceiling=task.get('reward_ceiling', 100), + )) + self.task_groups = task_map + + def task_spec(self, env_id): + try: + return self.task_groups[env_id] + except KeyError: + raise error.Unregistered('No task with env_id {} registered for benchmark {}', env_id, self.id) + + def score_evaluation(self, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp): + return self.scorer.score_evaluation(self, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp) + + def score_benchmark(self, score_map): + return self.scorer.score_benchmark(self, score_map) + +class Registry(object): + def __init__(self): + self.benchmarks = collections.OrderedDict() + + def register_benchmark(self, id, **kwargs): + self.benchmarks[id] = Benchmark(id=id, **kwargs) + + def benchmark_spec(self, id): + try: + return self.benchmarks[id] + except KeyError: + raise error.UnregisteredBenchmark('No registered benchmark with id: {}'.format(id)) + +registry = Registry() +register_benchmark = registry.register_benchmark +benchmark_spec = registry.benchmark_spec diff --git a/gym/benchmarks/scoring.py b/gym/benchmarks/scoring.py new file mode 100644 index 000000000..19d691cc8 --- /dev/null +++ b/gym/benchmarks/scoring.py @@ -0,0 +1,99 @@ +import logging +import numpy as np +from gym import envs + +logger = logging.getLogger(__name__) + +class ClipTo01ThenAverage(object): + def __init__(self, num_episodes=100): + self.num_episodes = num_episodes + + def score_evaluation(self, benchmark, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp): + tasks = benchmark.task_groups[env_id] + spec = envs.spec(env_id) + + (t_idx,) = np.where([t == 't' for t in episode_types]) # training episodes + (e_idx,) = np.where([t == 'e' for t in episode_types]) # evaluation episodes + if len(e_idx) == 0: + # If no episodes marked for evaluation, consider + # everything both a training and evaluation episode. + (t_idx,) = np.where([True for t in episode_types]) + (e_idx,) = np.where([True for t in episode_types]) + + training_lengths = np.array(episode_lengths)[t_idx] + training_rewards = np.array(episode_rewards)[t_idx] + + evaluation_lengths = np.array(episode_lengths)[e_idx] + evaluation_rewards = np.array(episode_rewards)[e_idx] + + # How many training timesteps have elapsed by the end of each + # episode. Not to be confused with Unix timestamps. + elapsed_timesteps = np.cumsum(training_lengths) + + scores = [] + solves = [] + rewards = [] + _timestamps = [] + for task in tasks: + # Find the first episode where we're over the allotted + # training timesteps. + (cutoff,) = np.where(elapsed_timesteps > task.timesteps) + if len(cutoff) > 0: + cutoff_idx = cutoff[-1] + orig_cutoff_idx = t_idx[cutoff_idx] # cutoff index in the original + (allowed_e_idx,) = np.where(e_idx < orig_cutoff_idx) # restrict to earlier episodes + else: + # All episodes are fair game + allowed_e_idx = e_idx + + if len(allowed_e_idx) > 0: + last_timestamp = timestamps[allowed_e_idx[-1]] + else: + # If we don't have any evaluation episodes, then the + # last valid timestamp is when we started. + last_timestamp = initial_reset_timestamp + + # Grab the last num_episodes evaluation episodes from + # before the cutoff (at which point we've gathered too + # much experience). + # + # This probably won't work long-term but is fine for now. + allowed_episode_rewards = np.array(episode_rewards)[allowed_e_idx] + reward = allowed_episode_rewards[-self.num_episodes:] + + if len(reward) == 0: + logger.info('No rewards for %s', env_id) + scores.append(0) + return + + floor = task.reward_floor + ceiling = task.reward_ceiling + + # Grab the indexes where we reached the ceiling + solved = reward >= ceiling + # Linearly rescale rewards to between 0 and 1 + clipped = np.clip((reward - floor) / (ceiling - floor), 0, 1) + + # Take the mean rescaled score + score = np.mean(clipped) + scores.append(score) + # Record the list of solved episodes + solves.append(solved) + # Record the list of rewards + rewards.append(reward) + # Record the timestamp of the last episode timestamp + _timestamps.append(last_timestamp) + + return { + 'rewards': rewards, + 'scores': scores, + 'solves': solves, + 'timestamps': _timestamps, + } + + def score_benchmark(self, benchmark, episode_scores): + all_scores = [] + for env_id, scores in episode_scores.items(): + all_scores += scores + + return np.mean(all_scores) diff --git a/gym/benchmarks/tests/test_benchmark.py b/gym/benchmarks/tests/test_benchmark.py new file mode 100644 index 000000000..c15f7dd81 --- /dev/null +++ b/gym/benchmarks/tests/test_benchmark.py @@ -0,0 +1,55 @@ +import numpy as np + +import gym +from gym import monitoring +from gym.monitoring.tests import helpers + +from gym.benchmarks import registration, scoring + +def test(): + benchmark = registration.Benchmark( + id='MyBenchmark-v0', + scorer=scoring.ClipTo01ThenAverage(), + task_groups={ + 'CartPole-v0': [{ + 'seeds': 1, + 'timesteps': 5 + }, { + 'seeds': 1, + 'timesteps': 100 + }], + }) + + with helpers.tempdir() as temp: + env = gym.make('CartPole-v0') + env.monitor.start(temp, video_callable=False, seed=0) + + env.monitor.configure(mode='evaluation') + rollout(env) + + env.monitor.configure(mode='training') + for i in range(2): + rollout(env) + + env.monitor.configure(mode='evaluation') + rollout(env, good=True) + + env.monitor.close() + results = monitoring.load_results(temp) + evaluation_score = benchmark.score_evaluation('CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp']) + benchmark_score = benchmark.score_benchmark({ + 'CartPole-v0': evaluation_score['scores'], + }) + + assert np.all(np.isclose(evaluation_score['scores'], [0.089999999999999997, 0.27000000000000002])), "evaluation_score={}".format(evaluation_score) + assert np.isclose(benchmark_score, 0.18), "benchmark_score={}".format(benchmark_score) + +def rollout(env, good=False): + env.reset() + + action = 0 + d = False + while not d: + if good: + action = 1 - action + o,r,d,i = env.step(action) diff --git a/gym/error.py b/gym/error.py index 328ca0550..88e8d1e18 100644 --- a/gym/error.py +++ b/gym/error.py @@ -5,7 +5,19 @@ class Error(Exception): # Local errors -class UnregisteredEnv(Error): +class Unregistered(Error): + """Raised when the user requests an item from the registry that does + not actually exist. + """ + pass + +class UnregisteredEnv(Unregistered): + """Raised when the user requests an env from the registry that does + not actually exist. + """ + pass + +class UnregisteredBenchmark(Unregistered): """Raised when the user requests an env from the registry that does not actually exist. """ diff --git a/gym/monitoring/__init__.py b/gym/monitoring/__init__.py index 94e742aad..221f41dc5 100644 --- a/gym/monitoring/__init__.py +++ b/gym/monitoring/__init__.py @@ -1,3 +1,3 @@ -from gym.monitoring.monitor import Monitor, load_results, _open_monitors +from gym.monitoring.monitor import Monitor, load_results, detect_training_manifests, _open_monitors from gym.monitoring.stats_recorder import StatsRecorder from gym.monitoring.video_recorder import VideoRecorder diff --git a/gym/monitoring/monitor.py b/gym/monitoring/monitor.py index 49e102625..6f33503d5 100644 --- a/gym/monitoring/monitor.py +++ b/gym/monitoring/monitor.py @@ -17,8 +17,10 @@ logger = logging.getLogger(__name__) FILE_PREFIX = 'openaigym' MANIFEST_PREFIX = FILE_PREFIX + '.manifest' -def detect_training_manifests(training_dir): - return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(MANIFEST_PREFIX + '.')] +def detect_training_manifests(training_dir, files=None): + if files is None: + files = os.listdir(training_dir) + return [os.path.join(training_dir, f) for f in files if f.startswith(MANIFEST_PREFIX + '.')] def detect_monitor_files(training_dir): return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(FILE_PREFIX + '.')] @@ -74,7 +76,6 @@ class Monitor(object): Attributes: id (Optional[str]): The ID of the monitored environment - """ def __init__(self, env): @@ -216,15 +217,28 @@ class Monitor(object): logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory) - def configure(self, video_callable=None): + def configure(self, video_callable=None, mode=None): """Reconfigure the monitor. video_callable (function): Whether to record video to upload to the scoreboard. + mode (['evaluation', 'training']): Whether this is an evaluation or training episode. """ + if not self.enabled: + raise error.Error('Can only configure an enabled monitor. (HINT: did you already close this monitor?)') + if video_callable is not None: self.video_callable = video_callable + if mode is not None: + if mode == 'evaluation': + type = 'e' + elif mode == 'training': + type = 't' + else: + raise error.Error('Invalid mode {}: must be "training" or "evaluation"', mode) + self.stats_recorder.type = type + def _before_step(self, action): if not self.enabled: return self.stats_recorder.before_step(action) @@ -331,7 +345,7 @@ def load_results(training_dir): main_seeds.append(None) env_info = collapse_env_infos(env_infos, training_dir) - timestamps, episode_lengths, episode_rewards, initial_reset_timestamp = merge_stats_files(stats_files) + timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp = merge_stats_files(stats_files) return { 'manifests': manifests, @@ -339,6 +353,7 @@ def load_results(training_dir): 'timestamps': timestamps, 'episode_lengths': episode_lengths, 'episode_rewards': episode_rewards, + 'episode_types': episode_types, 'initial_reset_timestamp': initial_reset_timestamp, 'videos': videos, 'main_seeds': main_seeds, @@ -349,6 +364,7 @@ def merge_stats_files(stats_files): timestamps = [] episode_lengths = [] episode_rewards = [] + episode_types = [] initial_reset_timestamps = [] for path in stats_files: @@ -358,6 +374,8 @@ def merge_stats_files(stats_files): timestamps += content['timestamps'] episode_lengths += content['episode_lengths'] episode_rewards += content['episode_rewards'] + # Recent addition + episode_types += content.get('episode_types', []) initial_reset_timestamps.append(content['initial_reset_timestamp']) idxs = np.argsort(timestamps) @@ -365,12 +383,17 @@ def merge_stats_files(stats_files): episode_lengths = np.array(episode_lengths)[idxs].tolist() episode_rewards = np.array(episode_rewards)[idxs].tolist() + if episode_types: + episode_types = np.array(episode_types)[idxs].tolist() + else: + episode_types = None + if len(initial_reset_timestamps) > 0: initial_reset_timestamp = min(initial_reset_timestamps) else: initial_reset_timestamp = 0 - return timestamps, episode_lengths, episode_rewards, initial_reset_timestamp + return timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp def collapse_env_infos(env_infos, training_dir): assert len(env_infos) > 0 diff --git a/gym/monitoring/stats_recorder.py b/gym/monitoring/stats_recorder.py index 9aaabd126..d32c2470f 100644 --- a/gym/monitoring/stats_recorder.py +++ b/gym/monitoring/stats_recorder.py @@ -12,6 +12,8 @@ class StatsRecorder(object): self.file_prefix = file_prefix self.episode_lengths = [] self.episode_rewards = [] + self.episode_types = [] # experimental addition + self._type = 't' self.timestamps = [] self.steps = None self.rewards = None @@ -22,6 +24,16 @@ class StatsRecorder(object): filename = '{}.stats.json'.format(self.file_prefix) self.path = os.path.join(self.directory, filename) + @property + def type(self): + return self._type + + @type.setter + def type(self, type): + if type not in ['t', 'e']: + raise error.Error('Invalid episode type {}: must be t for training or e for evaluation', type) + self._type = type + def before_step(self, action): assert not self.closed @@ -47,6 +59,10 @@ class StatsRecorder(object): self.save_complete() self.steps = 0 self.rewards = 0 + # We write the type at the beginning of the episode. If a user + # changes the type, it's more natural for it to apply next + # time the user calls reset(). + self.episode_types.append(self._type) def save_complete(self): if self.steps is not None: @@ -69,4 +85,5 @@ class StatsRecorder(object): 'timestamps': self.timestamps, 'episode_lengths': self.episode_lengths, 'episode_rewards': self.episode_rewards, + 'episode_types': self.episode_types, }, f) diff --git a/gym/scoreboard/__init__.py b/gym/scoreboard/__init__.py index 4b0b39530..086c61de6 100644 --- a/gym/scoreboard/__init__.py +++ b/gym/scoreboard/__init__.py @@ -8,7 +8,7 @@ http://rst.ninjs.org/ import os -from gym.scoreboard.client.resource import Algorithm, Evaluation, FileUpload +from gym.scoreboard.client.resource import Algorithm, BenchmarkRun, Evaluation, FileUpload from gym.scoreboard.registration import registry, add_task, add_group # Discover API key from the environment. (You should never have to diff --git a/gym/scoreboard/api.py b/gym/scoreboard/api.py index 0b5152d7b..70ac50d16 100644 --- a/gym/scoreboard/api.py +++ b/gym/scoreboard/api.py @@ -15,17 +15,46 @@ logger = logging.getLogger(__name__) video_name_re = re.compile('^[\w.-]+\.(mp4|avi|json)$') metadata_name_re = re.compile('^[\w.-]+\.meta\.json$') -def upload(training_dir, algorithm_id=None, writeup=None, api_key=None, ignore_open_monitors=False): +def upload(training_dir, algorithm_id=None, writeup=None, benchmark_id=None, api_key=None, ignore_open_monitors=False): """Upload the results of training (as automatically recorded by your env's monitor) to OpenAI Gym. Args: training_dir (Optional[str]): A directory containing the results of a training run. algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id) + benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release. writeup (Optional[str]): A Gist URL (of the form https://gist.github.com//) containing your writeup for this evaluation. api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY). """ + if benchmark_id: + # TODO: validate the number of matching evaluations + benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id) + benchmark_run_id = benchmark_run.id + recurse = True + + # Don't propagate algorithm_id to Evaluation if we're running as part of a benchmark + algorithm_id = None + else: + benchmark_run_id = None + recurse = False + + # Discover training directories + directories = [] + if recurse: + for name, _, files in os.walk(training_dir): + if monitoring.detect_training_manifests(name, files=files): + directories.append(name) + else: + directories.append(training_dir) + + # Actually do the uploads. + for training_dir in directories: + _upload(training_dir, algorithm_id, writeup, benchmark_run_id, api_key, ignore_open_monitors) + + return benchmark_run_id + +def _upload(training_dir, algorithm_id=None, writeup=None, benchmark_run_id=None, api_key=None, ignore_open_monitors=False): if not ignore_open_monitors: open_monitors = monitoring._open_monitors() if len(open_monitors) > 0: @@ -57,6 +86,7 @@ def upload(training_dir, algorithm_id=None, writeup=None, api_key=None, ignore_o algorithm={ 'id': algorithm_id, }, + benchmark_run_id=benchmark_run_id, writeup=writeup, gym_version=env_info['gym_version'], api_key=api_key, @@ -89,6 +119,7 @@ def upload_training_data(training_dir, api_key=None): timestamps = results['timestamps'] episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] + episode_types = results['episode_types'] main_seeds = results['main_seeds'] seeds = results['seeds'] videos = results['videos'] @@ -98,7 +129,7 @@ def upload_training_data(training_dir, api_key=None): # Do the relevant uploads if len(episode_lengths) > 0: - training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key, env_id=env_id) + training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, episode_types, timestamps, main_seeds, seeds, api_key, env_id=env_id) else: training_episode_batch = None @@ -114,12 +145,13 @@ def upload_training_data(training_dir, api_key=None): return env_info, training_episode_batch, training_video -def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key=None, env_id=None): +def upload_training_episode_batch(episode_lengths, episode_rewards, episode_types, timestamps, main_seeds, seeds, api_key=None, env_id=None): logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths)) file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key) file_upload.put({ 'episode_lengths': episode_lengths, 'episode_rewards': episode_rewards, + 'episode_types': episode_types, 'timestamps': timestamps, 'main_seeds': main_seeds, 'seeds': seeds, diff --git a/gym/scoreboard/client/resource.py b/gym/scoreboard/client/resource.py index 667a413ea..2696f97ff 100644 --- a/gym/scoreboard/client/resource.py +++ b/gym/scoreboard/client/resource.py @@ -13,6 +13,7 @@ def convert_to_gym_object(resp, api_key): types = { 'evaluation': Evaluation, 'file': FileUpload, + 'benchmark_run': BenchmarkRun, } if isinstance(resp, list): @@ -229,7 +230,7 @@ class APIResource(GymObject): if cls == APIResource: raise NotImplementedError( 'APIResource is an abstract class. You should perform ' - 'actions on its subclasses (e.g. Charge, Customer)') + 'actions on its subclasses') return str(urllib.parse.quote_plus(cls.__name__.lower())) @classmethod @@ -381,3 +382,11 @@ class Evaluation(CreateableAPIResource): class Algorithm(CreateableAPIResource): pass + +class BenchmarkRun(CreateableAPIResource, UpdateableAPIResource): + @classmethod + def class_name(cls): + return 'benchmark_run' + + def commit(self): + return self.request('post', '{}/commit'.format(self.instance_path())) diff --git a/gym/scoreboard/scoring.py b/gym/scoreboard/scoring.py index 6e60e5dbd..bc9edf5e1 100644 --- a/gym/scoreboard/scoring.py +++ b/gym/scoreboard/scoring.py @@ -16,13 +16,14 @@ def score_from_remote(url): parsed = result.json() episode_lengths = parsed['episode_lengths'] episode_rewards = parsed['episode_rewards'] + episode_types = parsed.get('episode_types') timestamps = parsed['timestamps'] # Handle legacy entries where initial_reset_timestamp wasn't set initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0]) env_id = parsed['env_id'] spec = gym.spec(env_id) - return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold) + return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold) def score_from_local(directory): """Calculate score from a local results directory""" @@ -33,15 +34,24 @@ def score_from_local(directory): episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] + episode_types = results['episode_types'] timestamps = results['timestamps'] initial_reset_timestamp = results['initial_reset_timestamp'] spec = gym.spec(results['env_info']['env_id']) - return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold) + return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold) -def score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, trials, reward_threshold): - """Method to calculate the score from merged monitor files. +def score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, trials, reward_threshold): + """Method to calculate the score from merged monitor files. Scores + only a single environment; mostly legacy. """ + if episode_types is not None: + # Select only the training episodes + t_idx = np.where(e == 't' for e in episode_types) + episode_lengths = np.array(episode_lengths)[t_idx] + episode_rewards = np.array(episode_rewards)[t_idx] + timestamps = np.array(timestamps)[t_idx] + # Make sure everything is a float -- no pesky ints. episode_rewards = np.array(episode_rewards, dtype='float64') @@ -87,6 +97,12 @@ def score_from_merged(episode_lengths, episode_rewards, timestamps, initial_rese 'seconds_in_total': seconds_in_total, } +def benchmark_score_from_merged(benchmark, env_id, episode_lengths, episode_rewards, episode_types): + """Method to calculate an environment's benchmark score from merged + monitor files. + """ + return benchmark.score(benchmark, env_id, episode_lengths, episode_rewards, episode_types) + def running_mean(x, N): x = np.array(x, dtype='float64') cumsum = np.cumsum(np.insert(x, 0, 0))