Remove scoreboard references (#750)

This commit is contained in:
Greg Brockman
2017-10-15 16:10:02 -07:00
committed by GitHub
parent b5576dc23a
commit 7b5aceaa84
21 changed files with 9 additions and 2794 deletions

View File

@@ -242,10 +242,9 @@ Examples
See the ``examples`` directory.
- Run `examples/agents/random_agent.py <https://github.com/openai/gym/blob/master/examples/agents/random_agent.py>`_ to run an simple random agent and upload the results to the scoreboard.
- Run `examples/agents/cem.py <https://github.com/openai/gym/blob/master/examples/agents/cem.py>`_ to run an actual learning agent (using the cross-entropy method) and upload the results to the scoreboard.
- Run `examples/agents/random_agent.py <https://github.com/openai/gym/blob/master/examples/agents/random_agent.py>`_ to run an simple random agent.
- Run `examples/agents/cem.py <https://github.com/openai/gym/blob/master/examples/agents/cem.py>`_ to run an actual learning agent (using the cross-entropy method).
- Run `examples/scripts/list_envs <https://github.com/openai/gym/blob/master/examples/scripts/list_envs>`_ to generate a list of all environments. (You see also just `browse <https://gym.openai.com/docs>`_ the list on our site.
- Run `examples/scripts/upload <https://github.com/openai/gym/blob/master/examples/scripts/upload>`_ to upload the recorded output from ``random_agent.py`` or ``cem.py``. Make sure to obtain an `API key <https://gym.openai.com/settings/profile>`_.
Testing
=======

View File

@@ -96,6 +96,3 @@ if __name__ == '__main__':
writefile('info.json', json.dumps(info))
env.close()
logger.info("Successfully ran cross-entropy method. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
gym.upload(outdir)

View File

@@ -61,8 +61,3 @@ if __name__ == '__main__':
# Close the env and write monitor result info to disk
env.close()
# Upload to the scoreboard. We could also do this from another
# process if we wanted.
logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
gym.upload(outdir)

View File

@@ -1,46 +0,0 @@
#!/usr/bin/env python
#
# This script assumes you have set an OPENAI_GYM_API_KEY environment
# variable. You can find your API key in the web interface:
# https://gym.openai.com/settings/profile.
import argparse
import logging
import os
import sys
import gym
# In modules, use `logger = logging.getLogger(__name__)`
logger = logging.getLogger()
class Uploader(object):
def __init__(self, training_dir, algorithm_id, benchmark_run_id, writeup):
self.training_dir = training_dir
self.algorithm_id = algorithm_id
self.benchmark_run_id = benchmark_run_id
self.writeup = writeup
def run(self):
gym.upload(self.training_dir, algorithm_id=self.algorithm_id, benchmark_run_id=self.benchmark_run_id, writeup=self.writeup)
def main():
parser = argparse.ArgumentParser(description=None)
parser.add_argument('-t', '--training-dir', required=True, help='What directory to upload.')
parser.add_argument('-a', '--algorithm_id', help='Set the algorithm id.')
parser.add_argument('-b', '--benchmark-run-id', help='Set the algorithm id.')
parser.add_argument('-w', '--writeup', help='Writeup to attach.')
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
args = parser.parse_args()
if args.verbosity == 0:
logger.setLevel(logging.INFO)
elif args.verbosity >= 1:
logger.setLevel(logging.DEBUG)
runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, benchmark_run_id=args.benchmark_run_id, writeup=args.writeup)
runner.run()
return 0
if __name__ == '__main__':
sys.exit(main())

File diff suppressed because it is too large Load Diff

View File

@@ -1,283 +1,2 @@
import logging
import json
import os
import re
import tarfile
import tempfile
from gym import benchmark_spec, error, monitoring
from gym.scoreboard.client import resource, util
import numpy as np
MAX_VIDEOS = 100
logger = logging.getLogger(__name__)
video_name_re = re.compile('^[\w.-]+\.(mp4|avi|json)$')
metadata_name_re = re.compile('^[\w.-]+\.meta\.json$')
def upload(training_dir, algorithm_id=None, writeup=None, tags=None, benchmark_id=None, api_key=None, ignore_open_monitors=False, skip_videos=False):
"""Upload the results of training (as automatically recorded by your
env's monitor) to OpenAI Gym.
Args:
training_dir (str): A directory containing the results of a training run.
algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name
benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable.
api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
ignore_open_monitors (Optional[bool]): Whether to check for open monitors before uploading. An open monitor can indicate that data has not been completely written. Defaults to False.
skip_videos (Optional[bool]): Whether to skip videos when uploading. Can be useful when submitting a benchmark with many trials. Defaults to False.
"""
if benchmark_id:
return _upload_benchmark(
training_dir,
algorithm_id,
benchmark_id,
benchmark_run_tags=tags,
api_key=api_key,
ignore_open_monitors=ignore_open_monitors,
skip_videos=skip_videos,
)
else:
if tags is not None:
logger.warning("Tags are NOT uploaded for evaluation submissions.")
# Single evalution upload
evaluation = _upload(
training_dir,
algorithm_id,
writeup,
benchmark_run_id=None,
api_key=api_key,
ignore_open_monitors=ignore_open_monitors,
skip_videos=skip_videos,
)
logger.info("""
****************************************************
You successfully uploaded your evaluation on %s to
OpenAI Gym! You can find it at:
%s
****************************************************
""".rstrip(), evaluation.env, evaluation.web_url())
return None
def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos):
# We're uploading a benchmark run.
directories = []
env_ids = []
for name, _, files in os.walk(training_dir):
manifests = monitoring.detect_training_manifests(name, files=files)
if manifests:
env_info = monitoring.load_env_info_from_manifests(manifests, training_dir)
env_ids.append(env_info['env_id'])
directories.append(name)
# Validate against benchmark spec
try:
spec = benchmark_spec(benchmark_id)
except error.UnregisteredBenchmark:
raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id))
spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)]
if not env_ids:
raise error.Error("Could not find any evaluations in {}".format(training_dir))
# This could be more stringent about mixing evaluations
if sorted(env_ids) != sorted(spec_env_ids):
logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids))
tags = json.dumps(benchmark_run_tags)
_create_with_retries = util.retry_exponential_backoff(
resource.BenchmarkRun.create,
(error.APIConnectionError,),
max_retries=5,
interval=3,
)
benchmark_run = _create_with_retries(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=tags)
benchmark_run_id = benchmark_run.id
# Actually do the uploads.
for training_dir in directories:
# N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
_upload_with_retries = util.retry_exponential_backoff(
_upload,
(error.APIConnectionError,),
max_retries=5,
interval=3,
)
_upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos)
logger.info("""
****************************************************
You successfully uploaded your benchmark on %s to
OpenAI Gym! You can find it at:
%s
****************************************************
""".rstrip(), benchmark_id, benchmark_run.web_url())
return benchmark_run_id
def _upload(training_dir, algorithm_id=None, writeup=None, benchmark_run_id=None, api_key=None, ignore_open_monitors=False, skip_videos=False):
if not ignore_open_monitors:
open_monitors = monitoring._open_monitors()
if len(open_monitors) > 0:
envs = [m.env.spec.id if m.env.spec else '(unknown)' for m in open_monitors]
raise error.Error("Still have an open monitor on {}. You must run 'env.close()' before uploading.".format(', '.join(envs)))
env_info, training_episode_batch, training_video = upload_training_data(training_dir, api_key=api_key, skip_videos=skip_videos)
env_id = env_info['env_id']
training_episode_batch_id = training_video_id = None
if training_episode_batch:
training_episode_batch_id = training_episode_batch.id
if training_video:
training_video_id = training_video.id
if logger.level <= logging.INFO:
if training_episode_batch_id is not None and training_video_id is not None:
logger.info('[%s] Creating evaluation object from %s with learning curve and training video', env_id, training_dir)
elif training_episode_batch_id is not None:
logger.info('[%s] Creating evaluation object from %s with learning curve', env_id, training_dir)
elif training_video_id is not None:
logger.info('[%s] Creating evaluation object from %s with training video', env_id, training_dir)
else:
raise error.Error("[%s] You didn't have any recorded training data in %s. Once you've used 'env = gym.wrappers.Monitor(env, directory)' to start recording, you need to actually run some rollouts. Please join the community chat on https://gym.openai.com if you have any issues."%(env_id, training_dir))
evaluation = resource.Evaluation.create(
training_episode_batch=training_episode_batch_id,
training_video=training_video_id,
env=env_info['env_id'],
algorithm={
'id': algorithm_id,
},
benchmark_run_id=benchmark_run_id,
writeup=writeup,
gym_version=env_info['gym_version'],
api_key=api_key,
)
return evaluation
def upload_training_data(training_dir, api_key=None, skip_videos=False):
# Could have multiple manifests
results = monitoring.load_results(training_dir)
if not results:
raise error.Error('''Could not find any manifest files in {}.
(HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.close()' at the end, or exit the process.)'''.format(training_dir))
manifests = results['manifests']
env_info = results['env_info']
data_sources = results['data_sources']
timestamps = results['timestamps']
episode_lengths = results['episode_lengths']
episode_rewards = results['episode_rewards']
episode_types = results['episode_types']
initial_reset_timestamps = results['initial_reset_timestamps']
videos = results['videos'] if not skip_videos else []
env_id = env_info['env_id']
logger.debug('[%s] Uploading data from manifest %s', env_id, ', '.join(manifests))
# Do the relevant uploads
if len(episode_lengths) > 0:
training_episode_batch = upload_training_episode_batch(data_sources, episode_lengths, episode_rewards, episode_types, initial_reset_timestamps, timestamps, api_key, env_id=env_id)
else:
training_episode_batch = None
if len(videos) > MAX_VIDEOS:
logger.warning('[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.', env_id, len(videos), MAX_VIDEOS)
subsample_inds = np.linspace(0, len(videos)-1, MAX_VIDEOS).astype('int') #pylint: disable=E1101
videos = [videos[i] for i in subsample_inds]
if len(videos) > 0:
training_video = upload_training_video(videos, api_key, env_id=env_id)
else:
training_video = None
return env_info, training_episode_batch, training_video
def upload_training_episode_batch(data_sources, episode_lengths, episode_rewards, episode_types, initial_reset_timestamps, timestamps, api_key=None, env_id=None):
logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
file_upload.put({
'data_sources': data_sources,
'episode_lengths': episode_lengths,
'episode_rewards': episode_rewards,
'episode_types': episode_types,
'initial_reset_timestamps': initial_reset_timestamps,
'timestamps': timestamps,
})
return file_upload
def upload_training_video(videos, api_key=None, env_id=None):
"""videos: should be list of (video_path, metadata_path) tuples"""
with tempfile.TemporaryFile() as archive_file:
write_archive(videos, archive_file, env_id=env_id)
archive_file.seek(0)
logger.info('[%s] Uploading videos of %d training episodes (%d bytes)', env_id, len(videos), util.file_size(archive_file))
file_upload = resource.FileUpload.create(purpose='video', content_type='application/vnd.openai.video+x-compressed', api_key=api_key)
file_upload.put(archive_file, encode=None)
return file_upload
def write_archive(videos, archive_file, env_id=None):
if len(videos) > MAX_VIDEOS:
raise error.Error('[{}] Trying to upload {} videos, but there is a limit of {} currently. If you actually want to upload this many videos, please email gym@openai.com with your use-case.'.format(env_id, MAX_VIDEOS, len(videos)))
logger.debug('[%s] Preparing an archive of %d videos: %s', env_id, len(videos), videos)
# Double check that there are no collisions
basenames = set()
manifest = {
'version': 0,
'videos': []
}
with tarfile.open(fileobj=archive_file, mode='w:gz') as tar:
for video_path, metadata_path in videos:
video_name = os.path.basename(video_path)
metadata_name = os.path.basename(metadata_path)
if not os.path.exists(video_path):
raise error.Error('[{}] No such video file {}. (HINT: Your video recorder may have broken midway through the run. You can check this with `video_recorder.functional`.)'.format(env_id, video_path))
elif not os.path.exists(metadata_path):
raise error.Error('[{}] No such metadata file {}. (HINT: this should be automatically created when using a VideoRecorder instance.)'.format(env_id, video_path))
# Do some sanity checking
if video_name in basenames:
raise error.Error('[{}] Duplicated video name {} in video list: {}'.format(env_id, video_name, videos))
elif metadata_name in basenames:
raise error.Error('[{}] Duplicated metadata file name {} in video list: {}'.format(env_id, metadata_name, videos))
elif not video_name_re.search(video_name):
raise error.Error('[{}] Invalid video name {} (must match {})'.format(env_id, video_name, video_name_re.pattern))
elif not metadata_name_re.search(metadata_name):
raise error.Error('[{}] Invalid metadata file name {} (must match {})'.format(env_id, metadata_name, metadata_name_re.pattern))
# Record that we've seen these names; add to manifest
basenames.add(video_name)
basenames.add(metadata_name)
manifest['videos'].append((video_name, metadata_name))
# Import the files into the archive
tar.add(video_path, arcname=video_name, recursive=False)
tar.add(metadata_path, arcname=metadata_name, recursive=False)
f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
try:
json.dump(manifest, f)
f.close()
tar.add(f.name, arcname='manifest.json')
finally:
f.close()
os.remove(f.name)
def upload(*args, **kwargs):
raise NotImplementedError('The Gym website has been end-of-lifed. This library is the focus of the project. See https://github.com/openai/gym/issues/718#issuecomment-329661594 for details.')

View File

@@ -1,4 +0,0 @@
# Client
This client was forked from the (Stripe
Python)[https://github.com/stripe/stripe-python] bindings.

View File

@@ -1,6 +0,0 @@
import logging
import os
from gym import error
logger = logging.getLogger(__name__)

View File

@@ -1,159 +0,0 @@
import json
import platform
import six.moves.urllib as urlparse
from six import iteritems
from gym import error, version
import gym.scoreboard.client
from gym.scoreboard.client import http_client
verify_ssl_certs = True # [SECURITY CRITICAL] only turn this off while debugging
http_client = http_client.RequestsClient(verify_ssl_certs=verify_ssl_certs)
def _build_api_url(url, query):
scheme, netloc, path, base_query, fragment = urlparse.urlsplit(url)
if base_query:
query = '%s&%s' % (base_query, query)
return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
def _strip_nulls(params):
if isinstance(params, dict):
stripped = {}
for key, value in iteritems(params):
value = _strip_nulls(value)
if value is not None:
stripped[key] = value
return stripped
else:
return params
class APIRequestor(object):
def __init__(self, key=None, api_base=None):
self.api_base = api_base or gym.scoreboard.api_base
self.api_key = key
self._client = http_client
def request(self, method, url, params=None, headers=None):
rbody, rcode, rheaders, my_api_key = self.request_raw(
method.lower(), url, params, headers)
resp = self.interpret_response(rbody, rcode, rheaders)
return resp, my_api_key
def handle_api_error(self, rbody, rcode, resp, rheaders):
# Rate limits were previously coded as 400's with code 'rate_limit'
if rcode == 429:
raise error.RateLimitError(
resp.get('detail'), rbody, rcode, resp, rheaders)
elif rcode in [400, 404]:
type = resp.get('type')
if type == 'about:blank':
type = None
raise error.InvalidRequestError(
resp.get('detail'), type,
rbody, rcode, resp, rheaders)
elif rcode == 401:
raise error.AuthenticationError(
resp.get('detail'), rbody, rcode, resp,
rheaders)
else:
detail = resp.get('detail')
# This information will only be returned to developers of
# the OpenAI Gym Scoreboard.
dev_info = resp.get('dev_info')
if dev_info:
detail = "{}\n\n<dev_info>\n{}\n</dev_info>".format(detail, dev_info['traceback'])
raise error.APIError(detail, rbody, rcode, resp,
rheaders)
def request_raw(self, method, url, params=None, supplied_headers=None):
"""
Mechanism for issuing an API call
"""
if self.api_key:
my_api_key = self.api_key
else:
my_api_key = gym.scoreboard.api_key
if my_api_key is None:
raise error.AuthenticationError("""You must provide an OpenAI Gym API key.
(HINT: Set your API key using "gym.scoreboard.api_key = .." or "export OPENAI_GYM_API_KEY=..."). You can find your API key in the OpenAI Gym web interface: https://gym.openai.com/settings/profile.""")
abs_url = '%s%s' % (self.api_base, url)
if params:
encoded_params = json.dumps(_strip_nulls(params))
else:
encoded_params = None
if method == 'get' or method == 'delete':
if params:
abs_url = _build_api_url(abs_url, encoded_params)
post_data = None
elif method == 'post':
post_data = encoded_params
else:
raise error.APIConnectionError(
'Unrecognized HTTP method %r. This may indicate a bug in the '
'OpenAI Gym bindings. Please contact gym@openai.com for '
'assistance.' % (method,))
ua = {
'bindings_version': version.VERSION,
'lang': 'python',
'publisher': 'openai',
'httplib': self._client.name,
}
for attr, func in [['lang_version', platform.python_version],
['platform', platform.platform]]:
try:
val = func()
except Exception as e:
val = "!! %s" % (e,)
ua[attr] = val
headers = {
'Openai-Gym-User-Agent': json.dumps(ua),
'User-Agent': 'Openai-Gym/v1 PythonBindings/%s' % (version.VERSION,),
'Authorization': 'Bearer %s' % (my_api_key,)
}
if method == 'post':
headers['Content-Type'] = 'application/json'
if supplied_headers is not None:
for key, value in supplied_headers.items():
headers[key] = value
rbody, rcode, rheaders = self._client.request(
method, abs_url, headers, post_data)
return rbody, rcode, rheaders, my_api_key
def interpret_response(self, rbody, rcode, rheaders):
content_type = rheaders.get('Content-Type', '')
if content_type.startswith('text/plain'):
# Pass through plain text
resp = rbody
if not (200 <= rcode < 300):
self.handle_api_error(rbody, rcode, {}, rheaders)
else:
# TODO: Be strict about other Content-Types
try:
if hasattr(rbody, 'decode'):
rbody = rbody.decode('utf-8')
resp = json.loads(rbody)
except Exception:
raise error.APIError(
"Invalid response body from API: %s "
"(HTTP response code was %d)" % (rbody, rcode),
rbody, rcode, rheaders)
if not (200 <= rcode < 300):
self.handle_api_error(rbody, rcode, resp, rheaders)
return resp

View File

@@ -1,94 +0,0 @@
import logging
import requests
import textwrap
import six
from gym import error
from gym.scoreboard.client import util
logger = logging.getLogger(__name__)
warned = False
def render_post_data(post_data):
if hasattr(post_data, 'fileno'): # todo: is this the right way of checking if it's a file?
return '%r (%d bytes)' % (post_data, util.file_size(post_data))
elif isinstance(post_data, (six.string_types, six.binary_type)):
return '%r (%d bytes)' % (post_data, len(post_data))
else:
return None
class RequestsClient(object):
name = 'requests'
def __init__(self, verify_ssl_certs=True):
self._verify_ssl_certs = verify_ssl_certs
self.session = requests.Session()
def request(self, method, url, headers, post_data=None, files=None):
global warned
kwargs = {}
# Really, really only turn this off while debugging.
if not self._verify_ssl_certs:
if not warned:
logger.warn('You have disabled SSL cert verification in OpenAI Gym, so we will not verify SSL certs. This means an attacker with control of your network could snoop on or modify your data in transit.')
warned = True
kwargs['verify'] = False
try:
try:
result = self.session.request(method,
url,
headers=headers,
data=post_data,
timeout=200,
files=files,
**kwargs)
except TypeError as e:
raise TypeError(
'Warning: It looks like your installed version of the '
'"requests" library is not compatible with OpenAI Gym\'s'
'usage thereof. (HINT: The most likely cause is that '
'your "requests" library is out of date. You can fix '
'that by running "pip install -U requests".) The '
'underlying error was: %s' % (e,))
# This causes the content to actually be read, which could cause
# e.g. a socket timeout. TODO: The other fetch methods probably
# are susceptible to the same and should be updated.
content = result.content
status_code = result.status_code
except Exception as e:
# Would catch just requests.exceptions.RequestException, but can
# also raise ValueError, RuntimeError, etc.
self._handle_request_error(e, method, url)
if logger.level <= logging.DEBUG:
logger.debug(
"""API request to %s returned (response code, response body) of
(%d, %r)
Request body was: %s""", url, status_code, content, render_post_data(post_data))
elif logger.level <= logging.INFO:
logger.info('HTTP request: %s %s %d', method.upper(), url, status_code)
return content, status_code, result.headers
def _handle_request_error(self, e, method, url):
if isinstance(e, requests.exceptions.RequestException):
msg = ("Unexpected error communicating with OpenAI Gym "
"(while calling {} {}). "
"If this problem persists, let us know at "
"gym@openai.com.".format(method, url))
err = "%s: %s" % (type(e).__name__, str(e))
else:
msg = ("Unexpected error communicating with OpenAI Gym. "
"It looks like there's probably a configuration "
"issue locally. If this problem persists, let us "
"know at gym@openai.com.")
err = "A %s was raised" % (type(e).__name__,)
if str(e):
err += " with error message %s" % (str(e),)
else:
err += " with no error message"
msg = textwrap.fill(msg, width=140) + "\n\n(Network error: %s)" % (err,)
raise error.APIConnectionError(msg)

View File

@@ -1,395 +0,0 @@
import json
import warnings
import sys
from six import string_types
from six import iteritems
import six.moves.urllib as urllib
import gym
from gym import error
from gym.scoreboard.client import api_requestor, util
def convert_to_gym_object(resp, api_key):
types = {
'evaluation': Evaluation,
'file': FileUpload,
'benchmark_run': BenchmarkRun,
}
if isinstance(resp, list):
return [convert_to_gym_object(i, api_key) for i in resp]
elif isinstance(resp, dict) and not isinstance(resp, GymObject):
resp = resp.copy()
klass_name = resp.get('object')
if isinstance(klass_name, string_types):
klass = types.get(klass_name, GymObject)
else:
klass = GymObject
return klass.construct_from(resp, api_key)
else:
return resp
def populate_headers(idempotency_key):
if idempotency_key is not None:
return {"Idempotency-Key": idempotency_key}
return None
def _compute_diff(current, previous):
if isinstance(current, dict):
previous = previous or {}
diff = current.copy()
for key in set(previous.keys()) - set(diff.keys()):
diff[key] = ""
return diff
return current if current is not None else ""
class GymObject(dict):
def __init__(self, id=None, api_key=None, **params):
super(GymObject, self).__init__()
self._unsaved_values = set()
self._transient_values = set()
self._retrieve_params = params
self._previous = None
object.__setattr__(self, 'api_key', api_key)
if id:
self['id'] = id
def update(self, update_dict):
for k in update_dict:
self._unsaved_values.add(k)
return super(GymObject, self).update(update_dict)
def __setattr__(self, k, v):
if k[0] == '_' or k in self.__dict__:
return super(GymObject, self).__setattr__(k, v)
else:
self[k] = v
def __getattr__(self, k):
if k[0] == '_':
raise AttributeError(k)
try:
return self[k]
except KeyError as err:
raise AttributeError(*err.args)
def __delattr__(self, k):
if k[0] == '_' or k in self.__dict__:
return super(GymObject, self).__delattr__(k)
else:
del self[k]
def __setitem__(self, k, v):
if v == "":
raise ValueError(
"You cannot set %s to an empty string. "
"We interpret empty strings as None in requests."
"You may set %s.%s = None to delete the property" % (
k, str(self), k))
super(GymObject, self).__setitem__(k, v)
# Allows for unpickling in Python 3.x
if not hasattr(self, '_unsaved_values'):
self._unsaved_values = set()
self._unsaved_values.add(k)
def __getitem__(self, k):
try:
return super(GymObject, self).__getitem__(k)
except KeyError as err:
if k in self._transient_values:
raise KeyError(
"%r. HINT: The %r attribute was set in the past."
"It was then wiped when refreshing the object with "
"the result returned by Rl_Gym's API, probably as a "
"result of a save(). The attributes currently "
"available on this object are: %s" %
(k, k, ', '.join(self.keys())))
else:
raise err
def __delitem__(self, k):
super(GymObject, self).__delitem__(k)
# Allows for unpickling in Python 3.x
if hasattr(self, '_unsaved_values'):
self._unsaved_values.remove(k)
@classmethod
def construct_from(cls, values, key):
instance = cls(values.get('id'), api_key=key)
instance.refresh_from(values, api_key=key)
return instance
def refresh_from(self, values, api_key=None, partial=False):
self.api_key = api_key or getattr(values, 'api_key', None)
# Wipe old state before setting new. This is useful for e.g.
# updating a customer, where there is no persistent card
# parameter. Mark those values which don't persist as transient
if partial:
self._unsaved_values = (self._unsaved_values - set(values))
else:
removed = set(self.keys()) - set(values)
self._transient_values = self._transient_values | removed
self._unsaved_values = set()
self.clear()
self._transient_values = self._transient_values - set(values)
for k, v in iteritems(values):
super(GymObject, self).__setitem__(
k, convert_to_gym_object(v, api_key))
self._previous = values
@classmethod
def api_base(cls):
return None
def request(self, method, url, params=None, headers=None):
if params is None:
params = self._retrieve_params
requestor = api_requestor.APIRequestor(
key=self.api_key, api_base=self.api_base())
response, api_key = requestor.request(method, url, params, headers)
return convert_to_gym_object(response, api_key)
def __repr__(self):
ident_parts = [type(self).__name__]
if isinstance(self.get('object'), string_types):
ident_parts.append(self.get('object'))
if isinstance(self.get('id'), string_types):
ident_parts.append('id=%s' % (self.get('id'),))
unicode_repr = '<%s at %s> JSON: %s' % (
' '.join(ident_parts), hex(id(self)), str(self))
if sys.version_info[0] < 3:
return unicode_repr.encode('utf-8')
else:
return unicode_repr
def __str__(self):
return json.dumps(self, sort_keys=True, indent=2)
def to_dict(self):
warnings.warn(
'The `to_dict` method is deprecated and will be removed in '
'version 2.0 of the Rl_Gym bindings. The GymObject is '
'itself now a subclass of `dict`.',
DeprecationWarning)
return dict(self)
@property
def gym_id(self):
return self.id
def serialize(self, previous):
params = {}
unsaved_keys = self._unsaved_values or set()
previous = previous or self._previous or {}
for k, v in self.items():
if k == 'id' or (isinstance(k, str) and k.startswith('_')):
continue
elif isinstance(v, APIResource):
continue
elif hasattr(v, 'serialize'):
params[k] = v.serialize(previous.get(k, None))
elif k in unsaved_keys:
params[k] = _compute_diff(v, previous.get(k, None))
return params
class APIResource(GymObject):
@classmethod
def retrieve(cls, id, api_key=None, **params):
instance = cls(id, api_key, **params)
instance.refresh()
return instance
def refresh(self):
self.refresh_from(self.request('get', self.instance_path()))
return self
@classmethod
def class_name(cls):
if cls == APIResource:
raise NotImplementedError(
'APIResource is an abstract class. You should perform '
'actions on its subclasses')
return str(urllib.parse.quote_plus(cls.__name__.lower()))
@classmethod
def class_path(cls):
cls_name = cls.class_name()
return "/v1/%ss" % (cls_name,)
def instance_path(self):
id = self.get('id')
if not id:
raise error.InvalidRequestError(
'Could not determine which URL to request: %s instance '
'has invalid ID: %r' % (type(self).__name__, id), 'id')
id = util.utf8(id)
base = self.class_path()
extn = urllib.parse.quote_plus(id)
return "%s/%s" % (base, extn)
class ListObject(GymObject):
def list(self, **params):
return self.request('get', self['url'], params)
def all(self, **params):
warnings.warn("The `all` method is deprecated and will"
"be removed in future versions. Please use the "
"`list` method instead",
DeprecationWarning)
return self.list(**params)
def auto_paging_iter(self):
page = self
params = dict(self._retrieve_params)
while True:
item_id = None
for item in page:
item_id = item.get('id', None)
yield item
if not getattr(page, 'has_more', False) or item_id is None:
return
params['starting_after'] = item_id
page = self.list(**params)
def create(self, idempotency_key=None, **params):
headers = populate_headers(idempotency_key)
return self.request('post', self['url'], params, headers)
def retrieve(self, id, **params):
base = self.get('url')
id = util.utf8(id)
extn = urllib.parse.quote_plus(id)
url = "%s/%s" % (base, extn)
return self.request('get', url, params)
def __iter__(self):
return getattr(self, 'data', []).__iter__()
# Classes of API operations
class ListableAPIResource(APIResource):
@classmethod
def all(cls, *args, **params):
warnings.warn("The `all` class method is deprecated and will"
"be removed in future versions. Please use the "
"`list` class method instead",
DeprecationWarning)
return cls.list(*args, **params)
@classmethod
def auto_paging_iter(self, *args, **params):
return self.list(*args, **params).auto_paging_iter()
@classmethod
def list(cls, api_key=None, idempotency_key=None, **params):
requestor = api_requestor.APIRequestor(api_key)
url = cls.class_path()
response, api_key = requestor.request('get', url, params)
return convert_to_gym_object(response, api_key)
class CreateableAPIResource(APIResource):
@classmethod
def create(cls, api_key=None, idempotency_key=None, **params):
requestor = api_requestor.APIRequestor(api_key)
url = cls.class_path()
headers = populate_headers(idempotency_key)
response, api_key = requestor.request('post', url, params, headers)
return convert_to_gym_object(response, api_key)
class UpdateableAPIResource(APIResource):
def save(self, idempotency_key=None):
updated_params = self.serialize(None)
headers = populate_headers(idempotency_key)
if updated_params:
self.refresh_from(self.request('post', self.instance_path(),
updated_params, headers))
else:
util.logger.debug("Trying to save already saved object %r", self)
return self
class DeletableAPIResource(APIResource):
def delete(self, **params):
self.refresh_from(self.request('delete', self.instance_path(), params))
return self
## Our resources
class FileUpload(ListableAPIResource):
@classmethod
def class_name(cls):
return 'file'
@classmethod
def create(cls, api_key=None, **params):
requestor = api_requestor.APIRequestor(
api_key, api_base=cls.api_base())
url = cls.class_path()
response, api_key = requestor.request(
'post', url, params=params)
return convert_to_gym_object(response, api_key)
def put(self, contents, encode='json'):
supplied_headers = {
"Content-Type": self.content_type
}
if encode == 'json':
contents = json.dumps(contents)
elif encode is None:
pass
else:
raise error.Error('Encode request for put must be "json" or None, not {}'.format(encode))
files = {'file': contents}
body, code, headers = api_requestor.http_client.request(
'post', self.post_url, post_data=self.post_fields, files=files, headers={})
if code != 204:
raise error.Error("Upload to S3 failed. If error persists, please contact us at gym@openai.com this message. S3 returned '{} -- {}'. Tried 'POST {}' with fields {}.".format(code, body, self.post_url, self.post_fields))
class Evaluation(CreateableAPIResource):
def web_url(self):
return "%s/evaluations/%s" % (gym.scoreboard.web_base, self.get('id'))
class Algorithm(CreateableAPIResource):
pass
class BenchmarkRun(CreateableAPIResource, UpdateableAPIResource):
@classmethod
def class_name(cls):
return 'benchmark_run'
def web_url(self):
return "%s/benchmark_runs/%s" % (gym.scoreboard.web_base, self.get('id'))
def commit(self):
return self.request('post', '{}/commit'.format(self.instance_path()))

View File

@@ -1,32 +0,0 @@
import mock
import unittest
import uuid
def fake_id(prefix):
entropy = ''.join([a for a in str(uuid.uuid4()) if a.isalnum()])
return '{}_{}'.format(prefix, entropy)
class APITestCase(unittest.TestCase):
def setUp(self):
super(APITestCase, self).setUp()
self.requestor_patcher = mock.patch('gym.scoreboard.client.api_requestor.APIRequestor')
requestor_class_mock = self.requestor_patcher.start()
self.requestor_mock = requestor_class_mock.return_value
def mock_response(self, res):
self.requestor_mock.request = mock.Mock(return_value=(res, 'reskey'))
class TestData(object):
@classmethod
def file_upload_response(cls):
return {
'id': fake_id('file'),
'object': 'file',
}
@classmethod
def evaluation_response(cls):
return {
'id': fake_id('file'),
'object': 'evaluation',
}

View File

@@ -1,16 +0,0 @@
from gym.scoreboard.client.tests import helper
from gym import scoreboard
class EvaluationTest(helper.APITestCase):
def test_create_evaluation(self):
self.mock_response(helper.TestData.evaluation_response())
evaluation = scoreboard.Evaluation.create()
assert isinstance(evaluation, scoreboard.Evaluation)
self.requestor_mock.request.assert_called_with(
'post',
'/v1/evaluations',
{},
None
)

View File

@@ -1,15 +0,0 @@
from gym.scoreboard.client.tests import helper
from gym import scoreboard
class FileUploadTest(helper.APITestCase):
def test_create_file_upload(self):
self.mock_response(helper.TestData.file_upload_response())
file_upload = scoreboard.FileUpload.create()
assert isinstance(file_upload, scoreboard.FileUpload), 'File upload is: {!r}'.format(file_upload)
self.requestor_mock.request.assert_called_with(
'post',
'/v1/files',
params={},
)

View File

@@ -1,45 +0,0 @@
import functools
import logging
import os
import random
import sys
import time
from gym import error
logger = logging.getLogger(__name__)
def utf8(value):
if isinstance(value, unicode) and sys.version_info < (3, 0):
return value.encode('utf-8')
else:
return value
def file_size(f):
return os.fstat(f.fileno()).st_size
def retry_exponential_backoff(f, errors, max_retries=5, interval=1):
@functools.wraps(f)
def wrapped(*args, **kwargs):
num_retries = 0
caught_errors = []
while True:
try:
result = f(*args, **kwargs)
except errors as e:
logger.error("Caught error in %s: %s" % (f.__name__, e))
caught_errors.append(e)
if num_retries < max_retries:
backoff = random.randint(1, 2 ** num_retries) * interval
logger.error("Retrying in %.1fs..." % backoff)
time.sleep(backoff)
num_retries += 1
else:
msg = "Exceeded allowed retries. Here are the individual error messages:\n\n"
msg += "\n\n".join("%s: %s" % (type(e).__name__, str(e)) for e in caught_errors)
raise error.RetriesExceededError(msg)
else:
break
return result
return wrapped

View File

@@ -1,60 +0,0 @@
import collections
import gym.envs
import logging
logger = logging.getLogger(__name__)
class RegistrationError(Exception):
pass
class Registry(object):
def __init__(self):
self.groups = collections.OrderedDict()
self.envs = collections.OrderedDict()
self.benchmarks = collections.OrderedDict()
def env(self, id):
return self.envs[id]
def add_group(self, id, name, description, universe=False):
self.groups[id] = {
'id': id,
'name': name,
'description': description,
'envs': [],
'universe': universe,
}
def add_task(self, id, group, summary=None, description=None, background=None, deprecated=False, experimental=False, contributor=None):
self.envs[id] = {
'group': group,
'id': id,
'summary': summary,
'description': description,
'background': background,
'deprecated': deprecated,
'experimental': experimental,
'contributor': contributor,
}
if not deprecated:
self.groups[group]['envs'].append(id)
def add_benchmark(self, id, name, description, unavailable):
self.benchmarks[id] = {
'id': id,
'name': name,
'description': description,
'unavailable': unavailable,
}
def finalize(self, strict=False):
# We used to check whether the scoreboard and environment ID
# registries matched here. However, we now support various
# registrations living in various repos, so this is less
# important.
pass
registry = Registry()
add_group = registry.add_group
add_task = registry.add_task
add_benchmark = registry.add_benchmark

View File

@@ -1,7 +0,0 @@
from gym.scoreboard import registration
def test_correct_registration():
try:
registration.registry.finalize(strict=True)
except registration.RegistrationError as e:
assert False, "Caught: {}".format(e)

View File

@@ -1,442 +0,0 @@
import numpy as np
from collections import defaultdict
from gym.benchmarks import registration, scoring
import gym
gym.undo_logger_setup()
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.ClipTo01ThenAverage(),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 100,
},
{'env_id': 'Pendulum-v0',
'trials': 1,
'max_timesteps': 100,
},
]
)
def _is_close(x, target):
return np.all(np.isclose(x, target))
def _eq_list_of_arrays(x, y):
return np.all([len(a) == len(b) and np.all(a == b) for a, b in zip(x, y)])
def _assert_evaluation_result(result, score=None, solves=None, rewards=None, lengths=None, timestamps=None):
debug_str = "score_evaluation={}".format(result)
if score is not None:
assert _is_close(result['scores'], score), debug_str
if solves is not None:
assert _eq_list_of_arrays(result['solves'], solves), debug_str
if rewards is not None:
assert _eq_list_of_arrays(result['rewards'], rewards), debug_str
if lengths is not None:
assert _eq_list_of_arrays(result['lengths'], lengths), debug_str
def _assert_benchmark_result(result, score=None, solves=None, summed_training_seconds=None, start_to_finish_seconds=None):
debug_str = "benchmark_result={}".format(result)
if score is not None:
assert _is_close(result['scores'], score), debug_str
if solves is not None:
assert np.all(result['solves']) == solves, debug_str
def _assert_benchmark_score(scores, score=None, num_envs_solved=None, summed_training_seconds=None, summed_task_wall_time=None, start_to_finish_seconds=None):
debug_str = "scores={} score={} num_envs_solved={} summed_training_seconds={} summed_wall_task_time={} start_to_finish_seconds={}".format(scores, score, num_envs_solved, summed_training_seconds, summed_task_wall_time, start_to_finish_seconds)
if score is not None:
assert _is_close(scores['score'], score), debug_str
if num_envs_solved is not None:
assert scores['num_envs_solved'] == num_envs_solved, debug_str
if summed_training_seconds is not None:
assert _is_close(scores['summed_training_seconds'], summed_training_seconds), debug_str
if summed_task_wall_time is not None:
assert _is_close(scores['summed_task_wall_time'], summed_task_wall_time), debug_str
if start_to_finish_seconds is not None:
assert _is_close(scores['start_to_finish_seconds'], start_to_finish_seconds), debug_str
def _benchmark_result_helper(benchmark, **kwargs):
for k, defval in dict(
env_id='CartPole-v0',
data_sources=[0],
initial_reset_timestamps=[1],
episode_lengths=[1],
episode_rewards=[1],
episode_types=['t'],
timestamps=[2]).items():
kwargs.setdefault(k, defval)
return benchmark.score_evaluation(**kwargs)
def test_clip_average_evaluation_scoring():
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.ClipTo01ThenAverage(num_episodes=1),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 5,
},
]
)
# simple scoring
benchmark_result = _benchmark_result_helper(benchmark)
_assert_benchmark_result(benchmark_result, score=0.01)
# test a successful run
benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100, 100], episode_lengths=[1, 1])
_assert_benchmark_result(benchmark_result, score=1.0, solves=True)
def test_clip_average_evaluation_not_enough_rewards():
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 5,
},
]
)
# simple scoring
benchmark_result = _benchmark_result_helper(benchmark)
_assert_evaluation_result(
benchmark_result,
score=0.005,
rewards=[np.array([1, 0])],
lengths=[np.array([1, 0])],
)
def test_clip_average_max_timesteps():
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 2,
},
]
)
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[1,1], episode_rewards=[1,1], episode_types=['t','t'], timestamps=[2,3])
_assert_benchmark_result(benchmark_result, score=0.01)
# make sure we only include the first result because of timesteps
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[1,100,100], episode_rewards=[1,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
_assert_benchmark_result(benchmark_result, score=0.005, solves=False)
def test_clip_average_max_seconds():
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_seconds': 1,
},
]
)
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[100,100], episode_rewards=[0,100], episode_types=['t','t'], timestamps=[1.5, 2])
_assert_benchmark_result(benchmark_result, score=0.5)
# make sure we only include the first result because of wall clock time
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[100,100,100], episode_rewards=[0,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
_assert_benchmark_result(benchmark_result, score=0.0)
def test_clip_average_benchmark_scoring():
benchmark_results = defaultdict(list)
for i, task in enumerate(benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, timestamps=[i + 2]))
scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.0001, num_envs_solved=0, summed_training_seconds=3.0, start_to_finish_seconds=2.0)
def test_clip_average_benchmark_empty():
scores = scoring.benchmark_aggregate_score(benchmark, {})
benchmark_results = defaultdict(list)
task = benchmark.tasks[0]
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id))
scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.00005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
def test_clip_average_benchmark_solved():
benchmark_results = defaultdict(list)
N = 200
for i, task in enumerate(benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(benchmark.score_evaluation(
env_id,
data_sources=[0] * N,
initial_reset_timestamps=[1],
episode_lengths=[1] * N,
episode_rewards=[1000] * N,
episode_types=['t'] * N,
timestamps=list(range(N)),
))
scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
_assert_benchmark_score(scores, score=1.0, num_envs_solved=len(benchmark.tasks))
def test_clip_average_benchmark_incomplete():
benchmark_results = defaultdict(list)
env_id = benchmark.tasks[0].env_id
benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, timestamps=[2]))
scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.00005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
def test_clip_average_benchmark_extra():
benchmark_results = defaultdict(list)
for i, task in enumerate(benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, timestamps=[i + 2]))
# add one more at the end with a high reward
benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, episode_rewards=[100], timestamps=[2]))
scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.0001, num_envs_solved=0, summed_training_seconds=3.0, summed_task_wall_time=3.0, start_to_finish_seconds=2.0)
def test_clip_average_benchmark_eval_handling():
# make sure we handle separate evaluation, training episodes properly
benchmark_results = defaultdict(list)
for i, task in enumerate(benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(benchmark.score_evaluation(
env_id,
data_sources=[0, 1, 1],
initial_reset_timestamps=[1, 1],
episode_lengths=[1, 1, 1],
episode_rewards=[1, 2, 3],
episode_types=['e', 't', 'e'],
timestamps=[i + 2, i + 3, i + 4],
))
scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.0004, num_envs_solved=0, summed_training_seconds=5.0, summed_task_wall_time=5.0, start_to_finish_seconds=3.0)
# Tests for total reward scoring
def test_clip_scoring():
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.TotalReward(),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 5,
},
]
)
# simple scoring
benchmark_result = _benchmark_result_helper(benchmark)
_assert_benchmark_result(benchmark_result, score=0.01)
# test a successful run
benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100])
_assert_benchmark_result(benchmark_result, score=1.0, solves=True)
def test_max_timesteps():
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.TotalReward(),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 2,
},
]
)
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[1,1], episode_rewards=[1,1], episode_types=['t','t'], timestamps=[2,3])
_assert_benchmark_result(benchmark_result, score=0.01)
# make sure we only include the first result because of timesteps
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[1,100,100], episode_rewards=[1,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
_assert_benchmark_result(benchmark_result, score=0.01, solves=False)
def test_max_seconds():
benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.TotalReward(),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_seconds': 1,
},
]
)
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[100,100], episode_rewards=[0,100], episode_types=['t','t'], timestamps=[1.5, 2])
_assert_benchmark_result(benchmark_result, score=0.5)
# make sure we only include the first result because of wall clock time
benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[100,100,100], episode_rewards=[0,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
_assert_benchmark_result(benchmark_result, score=0.0)
reward_benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.TotalReward(),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 5,
},
{'env_id': 'Pendulum-v0',
'trials': 1,
'max_timesteps': 5,
},
]
)
def test_total_reward_evaluation_scoring():
benchmark_result = _benchmark_result_helper(reward_benchmark)
_assert_evaluation_result(
benchmark_result,
score=0.01,
rewards=[np.array([1])],
lengths=[np.array([1])],
)
def test_total_reward_benchmark_scoring():
benchmark_results = defaultdict(list)
for i, task in enumerate(reward_benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[i + 2]))
scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.01, num_envs_solved=0, summed_training_seconds=3.0, summed_task_wall_time=3.0, start_to_finish_seconds=2.0)
def test_total_reward_benchmark_empty():
scores = scoring.benchmark_aggregate_score(reward_benchmark, {})
benchmark_results = defaultdict(list)
task = reward_benchmark.tasks[0]
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id))
scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
def test_total_reward_benchmark_solved():
benchmark_results = defaultdict(list)
N = 200
for i, task in enumerate(reward_benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(reward_benchmark.score_evaluation(
env_id,
data_sources=[0] * N,
initial_reset_timestamps=[1],
episode_lengths=[1] * N,
episode_rewards=[1000] * N,
episode_types=['t'] * N,
timestamps=list(range(N)),
))
scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=1.0, num_envs_solved=len(reward_benchmark.tasks))
def test_benchmark_incomplete():
benchmark_results = defaultdict(list)
env_id = reward_benchmark.tasks[0].env_id
benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[2]))
scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
def test_benchmark_extra():
benchmark_results = defaultdict(list)
for i, task in enumerate(reward_benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[i + 2]))
# add one more at the end with a high reward
benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, episode_rewards=[100], timestamps=[2]))
scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.01, num_envs_solved=0, summed_training_seconds=3.0, start_to_finish_seconds=2.0)
def test_benchmark_simple():
# TODO what is this testing?
benchmark_results = defaultdict(list)
for i, task in enumerate(reward_benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[i + 2]))
scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.01, num_envs_solved=0, summed_training_seconds=3.0, start_to_finish_seconds=2.0)
def test_benchmark_eval_handling():
# make sure we count all episodes
benchmark_results = defaultdict(list)
for i, task in enumerate(reward_benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(reward_benchmark.score_evaluation(
env_id,
data_sources=[0, 1, 1],
initial_reset_timestamps=[1, 2],
episode_lengths=[1, 1, 1],
episode_rewards=[1, 2, 3],
episode_types=['e', 't', 'e'],
timestamps=[i + 2, i + 3, i + 4],
))
scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.02, num_envs_solved=0, summed_training_seconds=8.0, summed_task_wall_time=7.0, start_to_finish_seconds=4.0)
reward_per_time_benchmark = registration.Benchmark(
id='TestBenchmark-v0',
scorer=scoring.RewardPerTime(),
tasks=[
{'env_id': 'CartPole-v0',
'trials': 1,
'max_timesteps': 5,
},
{'env_id': 'Pendulum-v0',
'trials': 1,
'max_timesteps': 5,
},
]
)
def test_reward_per_time_benchmark_scoring():
benchmark_results = defaultdict(list)
for i, task in enumerate(reward_per_time_benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(reward_per_time_benchmark, env_id=env_id, timestamps=[i + 2]))
scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.0075, num_envs_solved=0, summed_training_seconds=3.0, summed_task_wall_time=3.0, start_to_finish_seconds=2.0)
def test_reward_per_time_benchmark_empty():
scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, {})
benchmark_results = defaultdict(list)
task = reward_per_time_benchmark.tasks[0]
env_id = task.env_id
benchmark_results[env_id].append(_benchmark_result_helper(reward_per_time_benchmark, env_id=env_id, episode_lengths=[10]))
scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, benchmark_results)
_assert_benchmark_score(scores, score=0.0, num_envs_solved=0, summed_training_seconds=0.0, start_to_finish_seconds=0.0)
def test_reward_per_time_benchmark_solved():
benchmark_results = defaultdict(list)
N = 200
for i, task in enumerate(reward_per_time_benchmark.tasks):
env_id = task.env_id
benchmark_results[env_id].append(reward_per_time_benchmark.score_evaluation(
env_id,
data_sources=[0] * N,
initial_reset_timestamps=[1],
episode_lengths=[1] * N,
episode_rewards=[1000] * N,
episode_types=['t'] * N,
timestamps=list(range(N)),
))
scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, benchmark_results)
# Currently reward per time has no solved functionality, so num_envs_solved
# is 0
_assert_benchmark_score(scores, score=1.0, num_envs_solved=0)

View File

@@ -2,7 +2,6 @@ import gym
from gym import Wrapper
from gym import error, version
import os, json, logging, numpy as np, six
from gym.monitoring import stats_recorder, video_recorder
from gym.utils import atomic_write, closer
from gym.utils.json_utils import json_encode_np
@@ -384,3 +383,6 @@ def collapse_env_infos(env_infos, training_dir):
if key not in first:
raise error.Error("env_info {} from training directory {} is missing expected key {}. This is unexpected and likely indicates a bug in gym.".format(first, training_dir, key))
return first
# Put circular import at the bottom. Even better: break circular import
from gym.monitoring import stats_recorder, video_recorder