mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-19 21:42:02 +00:00
Add benchmark support (#338)
* Warn if seed doesn't return a list * Add preliminary BenchmarkRun support * Add experimental benchmark registration * Flesh out interface * Add preliminary BenchmarkRun support * Warn if seed doesn't return a list * Add experimental benchmark registration * Flesh out interface * Make benchmarkrun upload recursive * Add evaluation episodes * Add benchmark scoring * Tweak reward locations * Tweak scoring * Clear default metadata in Wrapper * Improve scoring * Expose registry; fix test * Add initial_reset_timestamp * Add back algorithm; fix tests
This commit is contained in:
@@ -14,18 +14,20 @@ import gym
|
|||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
class Uploader(object):
|
class Uploader(object):
|
||||||
def __init__(self, training_dir, algorithm_id, writeup):
|
def __init__(self, training_dir, algorithm_id, benchmark_run_id, writeup):
|
||||||
self.training_dir = training_dir
|
self.training_dir = training_dir
|
||||||
self.algorithm_id = algorithm_id
|
self.algorithm_id = algorithm_id
|
||||||
|
self.benchmark_run_id = benchmark_run_id
|
||||||
self.writeup = writeup
|
self.writeup = writeup
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
gym.upload(self.training_dir, algorithm_id=self.algorithm_id, writeup=self.writeup)
|
gym.upload(self.training_dir, algorithm_id=self.algorithm_id, benchmark_run_id=self.benchmark_run_id, writeup=self.writeup)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description=None)
|
parser = argparse.ArgumentParser(description=None)
|
||||||
parser.add_argument('-t', '--training-dir', required=True, help='What directory to upload.')
|
parser.add_argument('-t', '--training-dir', required=True, help='What directory to upload.')
|
||||||
parser.add_argument('-a', '--algorithm_id', help='Set the algorithm id.')
|
parser.add_argument('-a', '--algorithm_id', help='Set the algorithm id.')
|
||||||
|
parser.add_argument('-b', '--benchmark-run-id', help='Set the algorithm id.')
|
||||||
parser.add_argument('-w', '--writeup', help='Writeup to attach.')
|
parser.add_argument('-w', '--writeup', help='Writeup to attach.')
|
||||||
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
|
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@@ -35,7 +37,7 @@ def main():
|
|||||||
elif args.verbosity >= 1:
|
elif args.verbosity >= 1:
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, writeup=args.writeup)
|
runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, benchmark_run_id=args.benchmark_run_id, writeup=args.writeup)
|
||||||
runner.run()
|
runner.run()
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
@@ -32,6 +32,7 @@ del logger_setup
|
|||||||
sanity_check_dependencies()
|
sanity_check_dependencies()
|
||||||
|
|
||||||
from gym.core import Env, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
|
from gym.core import Env, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
|
||||||
|
from gym.benchmarks import benchmark_spec
|
||||||
from gym.envs import make, spec
|
from gym.envs import make, spec
|
||||||
from gym.scoreboard.api import upload
|
from gym.scoreboard.api import upload
|
||||||
|
|
||||||
|
76
gym/benchmarks/__init__.py
Normal file
76
gym/benchmarks/__init__.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
# EXPERIMENTAL: all may be removed soon
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from gym.benchmarks import scoring
|
||||||
|
from gym.benchmarks.registration import register_benchmark, benchmark_spec, registry
|
||||||
|
|
||||||
|
register_benchmark(
|
||||||
|
id='Atari7Pixel-v0',
|
||||||
|
scorer=scoring.ClipTo01ThenAverage(),
|
||||||
|
description='7 Atari games, with pixel observations',
|
||||||
|
task_groups={
|
||||||
|
'BeamRider-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Breakout-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Enduro-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Pong-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Qbert-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Seaquest-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'SpaceInvaders-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
|
||||||
|
register_benchmark(
|
||||||
|
id='Atari7Ram-v0',
|
||||||
|
description='7 Atari games, with RAM observations',
|
||||||
|
scorer=scoring.ClipTo01ThenAverage(),
|
||||||
|
task_groups={
|
||||||
|
'BeamRider-ram-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Breakout-ram-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Enduro-ram-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Pong-ram-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Qbert-ram-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'Seaquest-ram-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
'SpaceInvaders-ram-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 10000000
|
||||||
|
}],
|
||||||
|
})
|
65
gym/benchmarks/registration.py
Normal file
65
gym/benchmarks/registration.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# EXPERIMENTAL: all may be removed soon
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import gym.envs
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from gym import error
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class Task(object):
|
||||||
|
def __init__(self, env_id, seeds, timesteps, reward_floor, reward_ceiling):
|
||||||
|
self.env_id = env_id
|
||||||
|
self.seeds = seeds
|
||||||
|
self.timesteps = timesteps
|
||||||
|
self.reward_floor = reward_floor
|
||||||
|
self.reward_ceiling = reward_ceiling
|
||||||
|
|
||||||
|
class Benchmark(object):
|
||||||
|
def __init__(self, id, scorer, task_groups, description=None):
|
||||||
|
self.id = id
|
||||||
|
self.scorer = scorer
|
||||||
|
self.description = description
|
||||||
|
|
||||||
|
task_map = {}
|
||||||
|
for env_id, tasks in task_groups.items():
|
||||||
|
task_map[env_id] = []
|
||||||
|
for task in tasks:
|
||||||
|
task_map[env_id].append(Task(
|
||||||
|
env_id=env_id,
|
||||||
|
seeds=task['seeds'],
|
||||||
|
timesteps=task['timesteps'],
|
||||||
|
reward_floor=task.get('reward_floor', 0),
|
||||||
|
reward_ceiling=task.get('reward_ceiling', 100),
|
||||||
|
))
|
||||||
|
self.task_groups = task_map
|
||||||
|
|
||||||
|
def task_spec(self, env_id):
|
||||||
|
try:
|
||||||
|
return self.task_groups[env_id]
|
||||||
|
except KeyError:
|
||||||
|
raise error.Unregistered('No task with env_id {} registered for benchmark {}', env_id, self.id)
|
||||||
|
|
||||||
|
def score_evaluation(self, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp):
|
||||||
|
return self.scorer.score_evaluation(self, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp)
|
||||||
|
|
||||||
|
def score_benchmark(self, score_map):
|
||||||
|
return self.scorer.score_benchmark(self, score_map)
|
||||||
|
|
||||||
|
class Registry(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.benchmarks = collections.OrderedDict()
|
||||||
|
|
||||||
|
def register_benchmark(self, id, **kwargs):
|
||||||
|
self.benchmarks[id] = Benchmark(id=id, **kwargs)
|
||||||
|
|
||||||
|
def benchmark_spec(self, id):
|
||||||
|
try:
|
||||||
|
return self.benchmarks[id]
|
||||||
|
except KeyError:
|
||||||
|
raise error.UnregisteredBenchmark('No registered benchmark with id: {}'.format(id))
|
||||||
|
|
||||||
|
registry = Registry()
|
||||||
|
register_benchmark = registry.register_benchmark
|
||||||
|
benchmark_spec = registry.benchmark_spec
|
99
gym/benchmarks/scoring.py
Normal file
99
gym/benchmarks/scoring.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
from gym import envs
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ClipTo01ThenAverage(object):
|
||||||
|
def __init__(self, num_episodes=100):
|
||||||
|
self.num_episodes = num_episodes
|
||||||
|
|
||||||
|
def score_evaluation(self, benchmark, env_id, episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp):
|
||||||
|
tasks = benchmark.task_groups[env_id]
|
||||||
|
spec = envs.spec(env_id)
|
||||||
|
|
||||||
|
(t_idx,) = np.where([t == 't' for t in episode_types]) # training episodes
|
||||||
|
(e_idx,) = np.where([t == 'e' for t in episode_types]) # evaluation episodes
|
||||||
|
if len(e_idx) == 0:
|
||||||
|
# If no episodes marked for evaluation, consider
|
||||||
|
# everything both a training and evaluation episode.
|
||||||
|
(t_idx,) = np.where([True for t in episode_types])
|
||||||
|
(e_idx,) = np.where([True for t in episode_types])
|
||||||
|
|
||||||
|
training_lengths = np.array(episode_lengths)[t_idx]
|
||||||
|
training_rewards = np.array(episode_rewards)[t_idx]
|
||||||
|
|
||||||
|
evaluation_lengths = np.array(episode_lengths)[e_idx]
|
||||||
|
evaluation_rewards = np.array(episode_rewards)[e_idx]
|
||||||
|
|
||||||
|
# How many training timesteps have elapsed by the end of each
|
||||||
|
# episode. Not to be confused with Unix timestamps.
|
||||||
|
elapsed_timesteps = np.cumsum(training_lengths)
|
||||||
|
|
||||||
|
scores = []
|
||||||
|
solves = []
|
||||||
|
rewards = []
|
||||||
|
_timestamps = []
|
||||||
|
for task in tasks:
|
||||||
|
# Find the first episode where we're over the allotted
|
||||||
|
# training timesteps.
|
||||||
|
(cutoff,) = np.where(elapsed_timesteps > task.timesteps)
|
||||||
|
if len(cutoff) > 0:
|
||||||
|
cutoff_idx = cutoff[-1]
|
||||||
|
orig_cutoff_idx = t_idx[cutoff_idx] # cutoff index in the original
|
||||||
|
(allowed_e_idx,) = np.where(e_idx < orig_cutoff_idx) # restrict to earlier episodes
|
||||||
|
else:
|
||||||
|
# All episodes are fair game
|
||||||
|
allowed_e_idx = e_idx
|
||||||
|
|
||||||
|
if len(allowed_e_idx) > 0:
|
||||||
|
last_timestamp = timestamps[allowed_e_idx[-1]]
|
||||||
|
else:
|
||||||
|
# If we don't have any evaluation episodes, then the
|
||||||
|
# last valid timestamp is when we started.
|
||||||
|
last_timestamp = initial_reset_timestamp
|
||||||
|
|
||||||
|
# Grab the last num_episodes evaluation episodes from
|
||||||
|
# before the cutoff (at which point we've gathered too
|
||||||
|
# much experience).
|
||||||
|
#
|
||||||
|
# This probably won't work long-term but is fine for now.
|
||||||
|
allowed_episode_rewards = np.array(episode_rewards)[allowed_e_idx]
|
||||||
|
reward = allowed_episode_rewards[-self.num_episodes:]
|
||||||
|
|
||||||
|
if len(reward) == 0:
|
||||||
|
logger.info('No rewards for %s', env_id)
|
||||||
|
scores.append(0)
|
||||||
|
return
|
||||||
|
|
||||||
|
floor = task.reward_floor
|
||||||
|
ceiling = task.reward_ceiling
|
||||||
|
|
||||||
|
# Grab the indexes where we reached the ceiling
|
||||||
|
solved = reward >= ceiling
|
||||||
|
# Linearly rescale rewards to between 0 and 1
|
||||||
|
clipped = np.clip((reward - floor) / (ceiling - floor), 0, 1)
|
||||||
|
|
||||||
|
# Take the mean rescaled score
|
||||||
|
score = np.mean(clipped)
|
||||||
|
scores.append(score)
|
||||||
|
# Record the list of solved episodes
|
||||||
|
solves.append(solved)
|
||||||
|
# Record the list of rewards
|
||||||
|
rewards.append(reward)
|
||||||
|
# Record the timestamp of the last episode timestamp
|
||||||
|
_timestamps.append(last_timestamp)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'rewards': rewards,
|
||||||
|
'scores': scores,
|
||||||
|
'solves': solves,
|
||||||
|
'timestamps': _timestamps,
|
||||||
|
}
|
||||||
|
|
||||||
|
def score_benchmark(self, benchmark, episode_scores):
|
||||||
|
all_scores = []
|
||||||
|
for env_id, scores in episode_scores.items():
|
||||||
|
all_scores += scores
|
||||||
|
|
||||||
|
return np.mean(all_scores)
|
55
gym/benchmarks/tests/test_benchmark.py
Normal file
55
gym/benchmarks/tests/test_benchmark.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import gym
|
||||||
|
from gym import monitoring
|
||||||
|
from gym.monitoring.tests import helpers
|
||||||
|
|
||||||
|
from gym.benchmarks import registration, scoring
|
||||||
|
|
||||||
|
def test():
|
||||||
|
benchmark = registration.Benchmark(
|
||||||
|
id='MyBenchmark-v0',
|
||||||
|
scorer=scoring.ClipTo01ThenAverage(),
|
||||||
|
task_groups={
|
||||||
|
'CartPole-v0': [{
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 5
|
||||||
|
}, {
|
||||||
|
'seeds': 1,
|
||||||
|
'timesteps': 100
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
|
||||||
|
with helpers.tempdir() as temp:
|
||||||
|
env = gym.make('CartPole-v0')
|
||||||
|
env.monitor.start(temp, video_callable=False, seed=0)
|
||||||
|
|
||||||
|
env.monitor.configure(mode='evaluation')
|
||||||
|
rollout(env)
|
||||||
|
|
||||||
|
env.monitor.configure(mode='training')
|
||||||
|
for i in range(2):
|
||||||
|
rollout(env)
|
||||||
|
|
||||||
|
env.monitor.configure(mode='evaluation')
|
||||||
|
rollout(env, good=True)
|
||||||
|
|
||||||
|
env.monitor.close()
|
||||||
|
results = monitoring.load_results(temp)
|
||||||
|
evaluation_score = benchmark.score_evaluation('CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp'])
|
||||||
|
benchmark_score = benchmark.score_benchmark({
|
||||||
|
'CartPole-v0': evaluation_score['scores'],
|
||||||
|
})
|
||||||
|
|
||||||
|
assert np.all(np.isclose(evaluation_score['scores'], [0.089999999999999997, 0.27000000000000002])), "evaluation_score={}".format(evaluation_score)
|
||||||
|
assert np.isclose(benchmark_score, 0.18), "benchmark_score={}".format(benchmark_score)
|
||||||
|
|
||||||
|
def rollout(env, good=False):
|
||||||
|
env.reset()
|
||||||
|
|
||||||
|
action = 0
|
||||||
|
d = False
|
||||||
|
while not d:
|
||||||
|
if good:
|
||||||
|
action = 1 - action
|
||||||
|
o,r,d,i = env.step(action)
|
14
gym/error.py
14
gym/error.py
@@ -5,7 +5,19 @@ class Error(Exception):
|
|||||||
|
|
||||||
# Local errors
|
# Local errors
|
||||||
|
|
||||||
class UnregisteredEnv(Error):
|
class Unregistered(Error):
|
||||||
|
"""Raised when the user requests an item from the registry that does
|
||||||
|
not actually exist.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnregisteredEnv(Unregistered):
|
||||||
|
"""Raised when the user requests an env from the registry that does
|
||||||
|
not actually exist.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class UnregisteredBenchmark(Unregistered):
|
||||||
"""Raised when the user requests an env from the registry that does
|
"""Raised when the user requests an env from the registry that does
|
||||||
not actually exist.
|
not actually exist.
|
||||||
"""
|
"""
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from gym.monitoring.monitor import Monitor, load_results, _open_monitors
|
from gym.monitoring.monitor import Monitor, load_results, detect_training_manifests, _open_monitors
|
||||||
from gym.monitoring.stats_recorder import StatsRecorder
|
from gym.monitoring.stats_recorder import StatsRecorder
|
||||||
from gym.monitoring.video_recorder import VideoRecorder
|
from gym.monitoring.video_recorder import VideoRecorder
|
||||||
|
@@ -17,8 +17,10 @@ logger = logging.getLogger(__name__)
|
|||||||
FILE_PREFIX = 'openaigym'
|
FILE_PREFIX = 'openaigym'
|
||||||
MANIFEST_PREFIX = FILE_PREFIX + '.manifest'
|
MANIFEST_PREFIX = FILE_PREFIX + '.manifest'
|
||||||
|
|
||||||
def detect_training_manifests(training_dir):
|
def detect_training_manifests(training_dir, files=None):
|
||||||
return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(MANIFEST_PREFIX + '.')]
|
if files is None:
|
||||||
|
files = os.listdir(training_dir)
|
||||||
|
return [os.path.join(training_dir, f) for f in files if f.startswith(MANIFEST_PREFIX + '.')]
|
||||||
|
|
||||||
def detect_monitor_files(training_dir):
|
def detect_monitor_files(training_dir):
|
||||||
return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(FILE_PREFIX + '.')]
|
return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith(FILE_PREFIX + '.')]
|
||||||
@@ -74,7 +76,6 @@ class Monitor(object):
|
|||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
id (Optional[str]): The ID of the monitored environment
|
id (Optional[str]): The ID of the monitored environment
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, env):
|
def __init__(self, env):
|
||||||
@@ -216,15 +217,28 @@ class Monitor(object):
|
|||||||
|
|
||||||
logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
|
logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
|
||||||
|
|
||||||
def configure(self, video_callable=None):
|
def configure(self, video_callable=None, mode=None):
|
||||||
"""Reconfigure the monitor.
|
"""Reconfigure the monitor.
|
||||||
|
|
||||||
video_callable (function): Whether to record video to upload to the scoreboard.
|
video_callable (function): Whether to record video to upload to the scoreboard.
|
||||||
|
mode (['evaluation', 'training']): Whether this is an evaluation or training episode.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if not self.enabled:
|
||||||
|
raise error.Error('Can only configure an enabled monitor. (HINT: did you already close this monitor?)')
|
||||||
|
|
||||||
if video_callable is not None:
|
if video_callable is not None:
|
||||||
self.video_callable = video_callable
|
self.video_callable = video_callable
|
||||||
|
|
||||||
|
if mode is not None:
|
||||||
|
if mode == 'evaluation':
|
||||||
|
type = 'e'
|
||||||
|
elif mode == 'training':
|
||||||
|
type = 't'
|
||||||
|
else:
|
||||||
|
raise error.Error('Invalid mode {}: must be "training" or "evaluation"', mode)
|
||||||
|
self.stats_recorder.type = type
|
||||||
|
|
||||||
def _before_step(self, action):
|
def _before_step(self, action):
|
||||||
if not self.enabled: return
|
if not self.enabled: return
|
||||||
self.stats_recorder.before_step(action)
|
self.stats_recorder.before_step(action)
|
||||||
@@ -331,7 +345,7 @@ def load_results(training_dir):
|
|||||||
main_seeds.append(None)
|
main_seeds.append(None)
|
||||||
|
|
||||||
env_info = collapse_env_infos(env_infos, training_dir)
|
env_info = collapse_env_infos(env_infos, training_dir)
|
||||||
timestamps, episode_lengths, episode_rewards, initial_reset_timestamp = merge_stats_files(stats_files)
|
timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp = merge_stats_files(stats_files)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'manifests': manifests,
|
'manifests': manifests,
|
||||||
@@ -339,6 +353,7 @@ def load_results(training_dir):
|
|||||||
'timestamps': timestamps,
|
'timestamps': timestamps,
|
||||||
'episode_lengths': episode_lengths,
|
'episode_lengths': episode_lengths,
|
||||||
'episode_rewards': episode_rewards,
|
'episode_rewards': episode_rewards,
|
||||||
|
'episode_types': episode_types,
|
||||||
'initial_reset_timestamp': initial_reset_timestamp,
|
'initial_reset_timestamp': initial_reset_timestamp,
|
||||||
'videos': videos,
|
'videos': videos,
|
||||||
'main_seeds': main_seeds,
|
'main_seeds': main_seeds,
|
||||||
@@ -349,6 +364,7 @@ def merge_stats_files(stats_files):
|
|||||||
timestamps = []
|
timestamps = []
|
||||||
episode_lengths = []
|
episode_lengths = []
|
||||||
episode_rewards = []
|
episode_rewards = []
|
||||||
|
episode_types = []
|
||||||
initial_reset_timestamps = []
|
initial_reset_timestamps = []
|
||||||
|
|
||||||
for path in stats_files:
|
for path in stats_files:
|
||||||
@@ -358,6 +374,8 @@ def merge_stats_files(stats_files):
|
|||||||
timestamps += content['timestamps']
|
timestamps += content['timestamps']
|
||||||
episode_lengths += content['episode_lengths']
|
episode_lengths += content['episode_lengths']
|
||||||
episode_rewards += content['episode_rewards']
|
episode_rewards += content['episode_rewards']
|
||||||
|
# Recent addition
|
||||||
|
episode_types += content.get('episode_types', [])
|
||||||
initial_reset_timestamps.append(content['initial_reset_timestamp'])
|
initial_reset_timestamps.append(content['initial_reset_timestamp'])
|
||||||
|
|
||||||
idxs = np.argsort(timestamps)
|
idxs = np.argsort(timestamps)
|
||||||
@@ -365,12 +383,17 @@ def merge_stats_files(stats_files):
|
|||||||
episode_lengths = np.array(episode_lengths)[idxs].tolist()
|
episode_lengths = np.array(episode_lengths)[idxs].tolist()
|
||||||
episode_rewards = np.array(episode_rewards)[idxs].tolist()
|
episode_rewards = np.array(episode_rewards)[idxs].tolist()
|
||||||
|
|
||||||
|
if episode_types:
|
||||||
|
episode_types = np.array(episode_types)[idxs].tolist()
|
||||||
|
else:
|
||||||
|
episode_types = None
|
||||||
|
|
||||||
if len(initial_reset_timestamps) > 0:
|
if len(initial_reset_timestamps) > 0:
|
||||||
initial_reset_timestamp = min(initial_reset_timestamps)
|
initial_reset_timestamp = min(initial_reset_timestamps)
|
||||||
else:
|
else:
|
||||||
initial_reset_timestamp = 0
|
initial_reset_timestamp = 0
|
||||||
|
|
||||||
return timestamps, episode_lengths, episode_rewards, initial_reset_timestamp
|
return timestamps, episode_lengths, episode_rewards, episode_types, initial_reset_timestamp
|
||||||
|
|
||||||
def collapse_env_infos(env_infos, training_dir):
|
def collapse_env_infos(env_infos, training_dir):
|
||||||
assert len(env_infos) > 0
|
assert len(env_infos) > 0
|
||||||
|
@@ -12,6 +12,8 @@ class StatsRecorder(object):
|
|||||||
self.file_prefix = file_prefix
|
self.file_prefix = file_prefix
|
||||||
self.episode_lengths = []
|
self.episode_lengths = []
|
||||||
self.episode_rewards = []
|
self.episode_rewards = []
|
||||||
|
self.episode_types = [] # experimental addition
|
||||||
|
self._type = 't'
|
||||||
self.timestamps = []
|
self.timestamps = []
|
||||||
self.steps = None
|
self.steps = None
|
||||||
self.rewards = None
|
self.rewards = None
|
||||||
@@ -22,6 +24,16 @@ class StatsRecorder(object):
|
|||||||
filename = '{}.stats.json'.format(self.file_prefix)
|
filename = '{}.stats.json'.format(self.file_prefix)
|
||||||
self.path = os.path.join(self.directory, filename)
|
self.path = os.path.join(self.directory, filename)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self):
|
||||||
|
return self._type
|
||||||
|
|
||||||
|
@type.setter
|
||||||
|
def type(self, type):
|
||||||
|
if type not in ['t', 'e']:
|
||||||
|
raise error.Error('Invalid episode type {}: must be t for training or e for evaluation', type)
|
||||||
|
self._type = type
|
||||||
|
|
||||||
def before_step(self, action):
|
def before_step(self, action):
|
||||||
assert not self.closed
|
assert not self.closed
|
||||||
|
|
||||||
@@ -47,6 +59,10 @@ class StatsRecorder(object):
|
|||||||
self.save_complete()
|
self.save_complete()
|
||||||
self.steps = 0
|
self.steps = 0
|
||||||
self.rewards = 0
|
self.rewards = 0
|
||||||
|
# We write the type at the beginning of the episode. If a user
|
||||||
|
# changes the type, it's more natural for it to apply next
|
||||||
|
# time the user calls reset().
|
||||||
|
self.episode_types.append(self._type)
|
||||||
|
|
||||||
def save_complete(self):
|
def save_complete(self):
|
||||||
if self.steps is not None:
|
if self.steps is not None:
|
||||||
@@ -69,4 +85,5 @@ class StatsRecorder(object):
|
|||||||
'timestamps': self.timestamps,
|
'timestamps': self.timestamps,
|
||||||
'episode_lengths': self.episode_lengths,
|
'episode_lengths': self.episode_lengths,
|
||||||
'episode_rewards': self.episode_rewards,
|
'episode_rewards': self.episode_rewards,
|
||||||
|
'episode_types': self.episode_types,
|
||||||
}, f)
|
}, f)
|
||||||
|
@@ -8,7 +8,7 @@ http://rst.ninjs.org/
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from gym.scoreboard.client.resource import Algorithm, Evaluation, FileUpload
|
from gym.scoreboard.client.resource import Algorithm, BenchmarkRun, Evaluation, FileUpload
|
||||||
from gym.scoreboard.registration import registry, add_task, add_group
|
from gym.scoreboard.registration import registry, add_task, add_group
|
||||||
|
|
||||||
# Discover API key from the environment. (You should never have to
|
# Discover API key from the environment. (You should never have to
|
||||||
|
@@ -15,17 +15,46 @@ logger = logging.getLogger(__name__)
|
|||||||
video_name_re = re.compile('^[\w.-]+\.(mp4|avi|json)$')
|
video_name_re = re.compile('^[\w.-]+\.(mp4|avi|json)$')
|
||||||
metadata_name_re = re.compile('^[\w.-]+\.meta\.json$')
|
metadata_name_re = re.compile('^[\w.-]+\.meta\.json$')
|
||||||
|
|
||||||
def upload(training_dir, algorithm_id=None, writeup=None, api_key=None, ignore_open_monitors=False):
|
def upload(training_dir, algorithm_id=None, writeup=None, benchmark_id=None, api_key=None, ignore_open_monitors=False):
|
||||||
"""Upload the results of training (as automatically recorded by your
|
"""Upload the results of training (as automatically recorded by your
|
||||||
env's monitor) to OpenAI Gym.
|
env's monitor) to OpenAI Gym.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
training_dir (Optional[str]): A directory containing the results of a training run.
|
training_dir (Optional[str]): A directory containing the results of a training run.
|
||||||
algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id)
|
algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id)
|
||||||
|
benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
|
||||||
writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
|
writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
|
||||||
api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
|
api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if benchmark_id:
|
||||||
|
# TODO: validate the number of matching evaluations
|
||||||
|
benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id)
|
||||||
|
benchmark_run_id = benchmark_run.id
|
||||||
|
recurse = True
|
||||||
|
|
||||||
|
# Don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
|
||||||
|
algorithm_id = None
|
||||||
|
else:
|
||||||
|
benchmark_run_id = None
|
||||||
|
recurse = False
|
||||||
|
|
||||||
|
# Discover training directories
|
||||||
|
directories = []
|
||||||
|
if recurse:
|
||||||
|
for name, _, files in os.walk(training_dir):
|
||||||
|
if monitoring.detect_training_manifests(name, files=files):
|
||||||
|
directories.append(name)
|
||||||
|
else:
|
||||||
|
directories.append(training_dir)
|
||||||
|
|
||||||
|
# Actually do the uploads.
|
||||||
|
for training_dir in directories:
|
||||||
|
_upload(training_dir, algorithm_id, writeup, benchmark_run_id, api_key, ignore_open_monitors)
|
||||||
|
|
||||||
|
return benchmark_run_id
|
||||||
|
|
||||||
|
def _upload(training_dir, algorithm_id=None, writeup=None, benchmark_run_id=None, api_key=None, ignore_open_monitors=False):
|
||||||
if not ignore_open_monitors:
|
if not ignore_open_monitors:
|
||||||
open_monitors = monitoring._open_monitors()
|
open_monitors = monitoring._open_monitors()
|
||||||
if len(open_monitors) > 0:
|
if len(open_monitors) > 0:
|
||||||
@@ -57,6 +86,7 @@ def upload(training_dir, algorithm_id=None, writeup=None, api_key=None, ignore_o
|
|||||||
algorithm={
|
algorithm={
|
||||||
'id': algorithm_id,
|
'id': algorithm_id,
|
||||||
},
|
},
|
||||||
|
benchmark_run_id=benchmark_run_id,
|
||||||
writeup=writeup,
|
writeup=writeup,
|
||||||
gym_version=env_info['gym_version'],
|
gym_version=env_info['gym_version'],
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
@@ -89,6 +119,7 @@ def upload_training_data(training_dir, api_key=None):
|
|||||||
timestamps = results['timestamps']
|
timestamps = results['timestamps']
|
||||||
episode_lengths = results['episode_lengths']
|
episode_lengths = results['episode_lengths']
|
||||||
episode_rewards = results['episode_rewards']
|
episode_rewards = results['episode_rewards']
|
||||||
|
episode_types = results['episode_types']
|
||||||
main_seeds = results['main_seeds']
|
main_seeds = results['main_seeds']
|
||||||
seeds = results['seeds']
|
seeds = results['seeds']
|
||||||
videos = results['videos']
|
videos = results['videos']
|
||||||
@@ -98,7 +129,7 @@ def upload_training_data(training_dir, api_key=None):
|
|||||||
|
|
||||||
# Do the relevant uploads
|
# Do the relevant uploads
|
||||||
if len(episode_lengths) > 0:
|
if len(episode_lengths) > 0:
|
||||||
training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key, env_id=env_id)
|
training_episode_batch = upload_training_episode_batch(episode_lengths, episode_rewards, episode_types, timestamps, main_seeds, seeds, api_key, env_id=env_id)
|
||||||
else:
|
else:
|
||||||
training_episode_batch = None
|
training_episode_batch = None
|
||||||
|
|
||||||
@@ -114,12 +145,13 @@ def upload_training_data(training_dir, api_key=None):
|
|||||||
|
|
||||||
return env_info, training_episode_batch, training_video
|
return env_info, training_episode_batch, training_video
|
||||||
|
|
||||||
def upload_training_episode_batch(episode_lengths, episode_rewards, timestamps, main_seeds, seeds, api_key=None, env_id=None):
|
def upload_training_episode_batch(episode_lengths, episode_rewards, episode_types, timestamps, main_seeds, seeds, api_key=None, env_id=None):
|
||||||
logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
|
logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
|
||||||
file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
|
file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
|
||||||
file_upload.put({
|
file_upload.put({
|
||||||
'episode_lengths': episode_lengths,
|
'episode_lengths': episode_lengths,
|
||||||
'episode_rewards': episode_rewards,
|
'episode_rewards': episode_rewards,
|
||||||
|
'episode_types': episode_types,
|
||||||
'timestamps': timestamps,
|
'timestamps': timestamps,
|
||||||
'main_seeds': main_seeds,
|
'main_seeds': main_seeds,
|
||||||
'seeds': seeds,
|
'seeds': seeds,
|
||||||
|
@@ -13,6 +13,7 @@ def convert_to_gym_object(resp, api_key):
|
|||||||
types = {
|
types = {
|
||||||
'evaluation': Evaluation,
|
'evaluation': Evaluation,
|
||||||
'file': FileUpload,
|
'file': FileUpload,
|
||||||
|
'benchmark_run': BenchmarkRun,
|
||||||
}
|
}
|
||||||
|
|
||||||
if isinstance(resp, list):
|
if isinstance(resp, list):
|
||||||
@@ -229,7 +230,7 @@ class APIResource(GymObject):
|
|||||||
if cls == APIResource:
|
if cls == APIResource:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
'APIResource is an abstract class. You should perform '
|
'APIResource is an abstract class. You should perform '
|
||||||
'actions on its subclasses (e.g. Charge, Customer)')
|
'actions on its subclasses')
|
||||||
return str(urllib.parse.quote_plus(cls.__name__.lower()))
|
return str(urllib.parse.quote_plus(cls.__name__.lower()))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -381,3 +382,11 @@ class Evaluation(CreateableAPIResource):
|
|||||||
|
|
||||||
class Algorithm(CreateableAPIResource):
|
class Algorithm(CreateableAPIResource):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class BenchmarkRun(CreateableAPIResource, UpdateableAPIResource):
|
||||||
|
@classmethod
|
||||||
|
def class_name(cls):
|
||||||
|
return 'benchmark_run'
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
return self.request('post', '{}/commit'.format(self.instance_path()))
|
||||||
|
@@ -16,13 +16,14 @@ def score_from_remote(url):
|
|||||||
parsed = result.json()
|
parsed = result.json()
|
||||||
episode_lengths = parsed['episode_lengths']
|
episode_lengths = parsed['episode_lengths']
|
||||||
episode_rewards = parsed['episode_rewards']
|
episode_rewards = parsed['episode_rewards']
|
||||||
|
episode_types = parsed.get('episode_types')
|
||||||
timestamps = parsed['timestamps']
|
timestamps = parsed['timestamps']
|
||||||
# Handle legacy entries where initial_reset_timestamp wasn't set
|
# Handle legacy entries where initial_reset_timestamp wasn't set
|
||||||
initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0])
|
initial_reset_timestamp = parsed.get('initial_reset_timestamp', timestamps[0])
|
||||||
env_id = parsed['env_id']
|
env_id = parsed['env_id']
|
||||||
|
|
||||||
spec = gym.spec(env_id)
|
spec = gym.spec(env_id)
|
||||||
return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
|
return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
|
||||||
|
|
||||||
def score_from_local(directory):
|
def score_from_local(directory):
|
||||||
"""Calculate score from a local results directory"""
|
"""Calculate score from a local results directory"""
|
||||||
@@ -33,15 +34,24 @@ def score_from_local(directory):
|
|||||||
|
|
||||||
episode_lengths = results['episode_lengths']
|
episode_lengths = results['episode_lengths']
|
||||||
episode_rewards = results['episode_rewards']
|
episode_rewards = results['episode_rewards']
|
||||||
|
episode_types = results['episode_types']
|
||||||
timestamps = results['timestamps']
|
timestamps = results['timestamps']
|
||||||
initial_reset_timestamp = results['initial_reset_timestamp']
|
initial_reset_timestamp = results['initial_reset_timestamp']
|
||||||
spec = gym.spec(results['env_info']['env_id'])
|
spec = gym.spec(results['env_info']['env_id'])
|
||||||
|
|
||||||
return score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
|
return score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, spec.trials, spec.reward_threshold)
|
||||||
|
|
||||||
def score_from_merged(episode_lengths, episode_rewards, timestamps, initial_reset_timestamp, trials, reward_threshold):
|
def score_from_merged(episode_lengths, episode_rewards, episode_types, timestamps, initial_reset_timestamp, trials, reward_threshold):
|
||||||
"""Method to calculate the score from merged monitor files.
|
"""Method to calculate the score from merged monitor files. Scores
|
||||||
|
only a single environment; mostly legacy.
|
||||||
"""
|
"""
|
||||||
|
if episode_types is not None:
|
||||||
|
# Select only the training episodes
|
||||||
|
t_idx = np.where(e == 't' for e in episode_types)
|
||||||
|
episode_lengths = np.array(episode_lengths)[t_idx]
|
||||||
|
episode_rewards = np.array(episode_rewards)[t_idx]
|
||||||
|
timestamps = np.array(timestamps)[t_idx]
|
||||||
|
|
||||||
# Make sure everything is a float -- no pesky ints.
|
# Make sure everything is a float -- no pesky ints.
|
||||||
episode_rewards = np.array(episode_rewards, dtype='float64')
|
episode_rewards = np.array(episode_rewards, dtype='float64')
|
||||||
|
|
||||||
@@ -87,6 +97,12 @@ def score_from_merged(episode_lengths, episode_rewards, timestamps, initial_rese
|
|||||||
'seconds_in_total': seconds_in_total,
|
'seconds_in_total': seconds_in_total,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def benchmark_score_from_merged(benchmark, env_id, episode_lengths, episode_rewards, episode_types):
|
||||||
|
"""Method to calculate an environment's benchmark score from merged
|
||||||
|
monitor files.
|
||||||
|
"""
|
||||||
|
return benchmark.score(benchmark, env_id, episode_lengths, episode_rewards, episode_types)
|
||||||
|
|
||||||
def running_mean(x, N):
|
def running_mean(x, N):
|
||||||
x = np.array(x, dtype='float64')
|
x = np.array(x, dtype='float64')
|
||||||
cumsum = np.cumsum(np.insert(x, 0, 0))
|
cumsum = np.cumsum(np.insert(x, 0, 0))
|
||||||
|
Reference in New Issue
Block a user