Remove scoreboard references (#750)

2025-08-18 21:06:59 +00:00 · 2017-10-15 16:10:02 -07:00
parent b5576dc23a
commit 7b5aceaa84
21 changed files with 9 additions and 2794 deletions
--- a/README.rst
+++ b/README.rst
@@ -242,10 +242,9 @@ Examples

 See the ``examples`` directory.

- Run `examples/agents/random_agent.py <https://github.com/openai/gym/blob/master/examples/agents/random_agent.py>`_ to run an simple random agent and upload the results to the scoreboard.
- Run `examples/agents/cem.py <https://github.com/openai/gym/blob/master/examples/agents/cem.py>`_ to run an actual learning agent (using the cross-entropy method) and upload the results to the scoreboard.
+- Run `examples/agents/random_agent.py <https://github.com/openai/gym/blob/master/examples/agents/random_agent.py>`_ to run an simple random agent.
+- Run `examples/agents/cem.py <https://github.com/openai/gym/blob/master/examples/agents/cem.py>`_ to run an actual learning agent (using the cross-entropy method).
 - Run `examples/scripts/list_envs <https://github.com/openai/gym/blob/master/examples/scripts/list_envs>`_ to generate a list of all environments. (You see also just `browse <https://gym.openai.com/docs>`_ the list on our site.
- Run `examples/scripts/upload <https://github.com/openai/gym/blob/master/examples/scripts/upload>`_ to upload the recorded output from ``random_agent.py`` or ``cem.py``. Make sure to obtain an `API key <https://gym.openai.com/settings/profile>`_.

 Testing
 =======
--- a/examples/agents/cem.py
+++ b/examples/agents/cem.py
@@ -96,6 +96,3 @@ if __name__ == '__main__':
    writefile('info.json', json.dumps(info))

    env.close()
-
-    logger.info("Successfully ran cross-entropy method. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
-    gym.upload(outdir)
--- a/examples/agents/random_agent.py
+++ b/examples/agents/random_agent.py
@@ -61,8 +61,3 @@ if __name__ == '__main__':

    # Close the env and write monitor result info to disk
    env.close()
-
-    # Upload to the scoreboard. We could also do this from another
-    # process if we wanted.
-    logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
-    gym.upload(outdir)
--- a/examples/scripts/upload
+++ b/examples/scripts/upload
@@ -1,46 +0,0 @@
-#!/usr/bin/env python
-#
-# This script assumes you have set an OPENAI_GYM_API_KEY environment
-# variable. You can find your API key in the web interface:
-# https://gym.openai.com/settings/profile.
-import argparse
-import logging
-import os
-import sys
-
-import gym
-
-# In modules, use `logger = logging.getLogger(__name__)`
-logger = logging.getLogger()
-
-class Uploader(object):
-    def __init__(self, training_dir, algorithm_id, benchmark_run_id, writeup):
-        self.training_dir = training_dir
-        self.algorithm_id = algorithm_id
-        self.benchmark_run_id = benchmark_run_id
-        self.writeup = writeup
-
-    def run(self):
-        gym.upload(self.training_dir, algorithm_id=self.algorithm_id, benchmark_run_id=self.benchmark_run_id, writeup=self.writeup)
-
-def main():
-    parser = argparse.ArgumentParser(description=None)
-    parser.add_argument('-t', '--training-dir', required=True, help='What directory to upload.')
-    parser.add_argument('-a', '--algorithm_id', help='Set the algorithm id.')
-    parser.add_argument('-b', '--benchmark-run-id', help='Set the algorithm id.')
-    parser.add_argument('-w', '--writeup', help='Writeup to attach.')
-    parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
-    args = parser.parse_args()
-
-    if args.verbosity == 0:
-        logger.setLevel(logging.INFO)
-    elif args.verbosity >= 1:
-        logger.setLevel(logging.DEBUG)
-
-    runner = Uploader(training_dir=args.training_dir, algorithm_id=args.algorithm_id, benchmark_run_id=args.benchmark_run_id, writeup=args.writeup)
-    runner.run()
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/gym/scoreboard/init.py
+++ b/gym/scoreboard/init.py
--- a/gym/scoreboard/api.py
+++ b/gym/scoreboard/api.py
@@ -1,283 +1,2 @@
-import logging
-import json
-import os
-import re
-import tarfile
-import tempfile
-from gym import benchmark_spec, error, monitoring
-from gym.scoreboard.client import resource, util
-import numpy as np
-
-MAX_VIDEOS = 100
-
-logger = logging.getLogger(__name__)
-
-video_name_re = re.compile('^[\w.-]+\.(mp4|avi|json)$')
-metadata_name_re = re.compile('^[\w.-]+\.meta\.json$')
-
-def upload(training_dir, algorithm_id=None, writeup=None, tags=None, benchmark_id=None, api_key=None, ignore_open_monitors=False, skip_videos=False):
-    """Upload the results of training (as automatically recorded by your
-    env's monitor) to OpenAI Gym.
-
-    Args:
-        training_dir (str): A directory containing the results of a training run.
-        algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name
-        benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release.
-        writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation.
-        tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable.
-        api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY).
-        ignore_open_monitors (Optional[bool]): Whether to check for open monitors before uploading. An open monitor can indicate that data has not been completely written. Defaults to False.
-        skip_videos (Optional[bool]): Whether to skip videos when uploading. Can be useful when submitting a benchmark with many trials. Defaults to False.
-    """
-
-    if benchmark_id:
-        return _upload_benchmark(
-            training_dir,
-            algorithm_id,
-            benchmark_id,
-            benchmark_run_tags=tags,
-            api_key=api_key,
-            ignore_open_monitors=ignore_open_monitors,
-            skip_videos=skip_videos,
-        )
-    else:
-        if tags is not None:
-             logger.warning("Tags are NOT uploaded for evaluation submissions.")
-        # Single evalution upload
-        evaluation = _upload(
-            training_dir,
-            algorithm_id,
-            writeup,
-            benchmark_run_id=None,
-            api_key=api_key,
-            ignore_open_monitors=ignore_open_monitors,
-            skip_videos=skip_videos,
-        )
-
-        logger.info("""
-****************************************************
-You successfully uploaded your evaluation on %s to
-OpenAI Gym! You can find it at:
-
-    %s
-
-****************************************************
-        """.rstrip(), evaluation.env, evaluation.web_url())
-
-        return None
-
-
-def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_tags, api_key, ignore_open_monitors, skip_videos):
-    # We're uploading a benchmark run.
-    directories = []
-    env_ids = []
-    for name, _, files in os.walk(training_dir):
-        manifests = monitoring.detect_training_manifests(name, files=files)
-        if manifests:
-            env_info = monitoring.load_env_info_from_manifests(manifests, training_dir)
-            env_ids.append(env_info['env_id'])
-            directories.append(name)
-
-    # Validate against benchmark spec
-    try:
-        spec = benchmark_spec(benchmark_id)
-    except error.UnregisteredBenchmark:
-        raise error.Error("Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?".format(benchmark_id))
-
-    spec_env_ids = [task.env_id for task in spec.tasks for _ in range(task.trials)]
-
-    if not env_ids:
-        raise error.Error("Could not find any evaluations in {}".format(training_dir))
-
-    # This could be more stringent about mixing evaluations
-    if sorted(env_ids) != sorted(spec_env_ids):
-        logger.info("WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids))
-
-    tags = json.dumps(benchmark_run_tags)
-    _create_with_retries = util.retry_exponential_backoff(
-        resource.BenchmarkRun.create,
-        (error.APIConnectionError,),
-        max_retries=5,
-        interval=3,
-    )
-    benchmark_run = _create_with_retries(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=tags)
-    benchmark_run_id = benchmark_run.id
-
-    # Actually do the uploads.
-    for training_dir in directories:
-        # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
-        _upload_with_retries = util.retry_exponential_backoff(
-            _upload,
-            (error.APIConnectionError,),
-            max_retries=5,
-            interval=3,
-        )
-        _upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos)
-
-    logger.info("""
-****************************************************
-You successfully uploaded your benchmark on %s to
-OpenAI Gym! You can find it at:
-
-    %s
-
-****************************************************
-    """.rstrip(), benchmark_id, benchmark_run.web_url())
-
-    return benchmark_run_id
-
-
-def _upload(training_dir, algorithm_id=None, writeup=None, benchmark_run_id=None, api_key=None, ignore_open_monitors=False, skip_videos=False):
-    if not ignore_open_monitors:
-        open_monitors = monitoring._open_monitors()
-        if len(open_monitors) > 0:
-            envs = [m.env.spec.id if m.env.spec else '(unknown)' for m in open_monitors]
-            raise error.Error("Still have an open monitor on {}. You must run 'env.close()' before uploading.".format(', '.join(envs)))
-
-    env_info, training_episode_batch, training_video = upload_training_data(training_dir, api_key=api_key, skip_videos=skip_videos)
-    env_id = env_info['env_id']
-    training_episode_batch_id = training_video_id = None
-    if training_episode_batch:
-        training_episode_batch_id = training_episode_batch.id
-    if training_video:
-        training_video_id = training_video.id
-
-    if logger.level <= logging.INFO:
-        if training_episode_batch_id is not None and training_video_id is not None:
-            logger.info('[%s] Creating evaluation object from %s with learning curve and training video', env_id, training_dir)
-        elif training_episode_batch_id is not None:
-            logger.info('[%s] Creating evaluation object from %s with learning curve', env_id, training_dir)
-        elif training_video_id is not None:
-            logger.info('[%s] Creating evaluation object from %s with training video', env_id, training_dir)
-        else:
-            raise error.Error("[%s] You didn't have any recorded training data in %s. Once you've used 'env = gym.wrappers.Monitor(env, directory)' to start recording, you need to actually run some rollouts. Please join the community chat on https://gym.openai.com if you have any issues."%(env_id, training_dir))
-
-    evaluation = resource.Evaluation.create(
-        training_episode_batch=training_episode_batch_id,
-        training_video=training_video_id,
-        env=env_info['env_id'],
-        algorithm={
-            'id': algorithm_id,
-        },
-        benchmark_run_id=benchmark_run_id,
-        writeup=writeup,
-        gym_version=env_info['gym_version'],
-        api_key=api_key,
-    )
-
-    return evaluation
-
-def upload_training_data(training_dir, api_key=None, skip_videos=False):
-    # Could have multiple manifests
-    results = monitoring.load_results(training_dir)
-    if not results:
-        raise error.Error('''Could not find any manifest files in {}.
-
-(HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.close()' at the end, or exit the process.)'''.format(training_dir))
-
-    manifests = results['manifests']
-    env_info = results['env_info']
-    data_sources = results['data_sources']
-    timestamps = results['timestamps']
-    episode_lengths = results['episode_lengths']
-    episode_rewards = results['episode_rewards']
-    episode_types = results['episode_types']
-    initial_reset_timestamps = results['initial_reset_timestamps']
-    videos = results['videos'] if not skip_videos else []
-
-    env_id = env_info['env_id']
-    logger.debug('[%s] Uploading data from manifest %s', env_id, ', '.join(manifests))
-
-    # Do the relevant uploads
-    if len(episode_lengths) > 0:
-        training_episode_batch = upload_training_episode_batch(data_sources, episode_lengths, episode_rewards, episode_types, initial_reset_timestamps, timestamps, api_key, env_id=env_id)
-    else:
-        training_episode_batch = None
-
-    if len(videos) > MAX_VIDEOS:
-        logger.warning('[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.', env_id, len(videos), MAX_VIDEOS)
-        subsample_inds = np.linspace(0, len(videos)-1, MAX_VIDEOS).astype('int') #pylint: disable=E1101
-        videos = [videos[i] for i in subsample_inds]
-
-    if len(videos) > 0:
-        training_video = upload_training_video(videos, api_key, env_id=env_id)
-    else:
-        training_video = None
-
-    return env_info, training_episode_batch, training_video
-
-def upload_training_episode_batch(data_sources, episode_lengths, episode_rewards, episode_types, initial_reset_timestamps, timestamps, api_key=None, env_id=None):
-    logger.info('[%s] Uploading %d episodes of training data', env_id, len(episode_lengths))
-    file_upload = resource.FileUpload.create(purpose='episode_batch', api_key=api_key)
-    file_upload.put({
-        'data_sources': data_sources,
-        'episode_lengths': episode_lengths,
-        'episode_rewards': episode_rewards,
-        'episode_types': episode_types,
-        'initial_reset_timestamps': initial_reset_timestamps,
-        'timestamps': timestamps,
-    })
-    return file_upload
-
-def upload_training_video(videos, api_key=None, env_id=None):
-    """videos: should be list of (video_path, metadata_path) tuples"""
-    with tempfile.TemporaryFile() as archive_file:
-        write_archive(videos, archive_file, env_id=env_id)
-        archive_file.seek(0)
-
-        logger.info('[%s] Uploading videos of %d training episodes (%d bytes)', env_id, len(videos), util.file_size(archive_file))
-        file_upload = resource.FileUpload.create(purpose='video', content_type='application/vnd.openai.video+x-compressed', api_key=api_key)
-        file_upload.put(archive_file, encode=None)
-
-    return file_upload
-
-def write_archive(videos, archive_file, env_id=None):
-    if len(videos) > MAX_VIDEOS:
-        raise error.Error('[{}] Trying to upload {} videos, but there is a limit of {} currently. If you actually want to upload this many videos, please email gym@openai.com with your use-case.'.format(env_id, MAX_VIDEOS, len(videos)))
-
-    logger.debug('[%s] Preparing an archive of %d videos: %s', env_id, len(videos), videos)
-
-    # Double check that there are no collisions
-    basenames = set()
-    manifest = {
-        'version': 0,
-        'videos': []
-    }
-
-    with tarfile.open(fileobj=archive_file, mode='w:gz') as tar:
-        for video_path, metadata_path in videos:
-            video_name = os.path.basename(video_path)
-            metadata_name = os.path.basename(metadata_path)
-
-            if not os.path.exists(video_path):
-                raise error.Error('[{}] No such video file {}. (HINT: Your video recorder may have broken midway through the run. You can check this with `video_recorder.functional`.)'.format(env_id, video_path))
-            elif not os.path.exists(metadata_path):
-                raise error.Error('[{}] No such metadata file {}. (HINT: this should be automatically created when using a VideoRecorder instance.)'.format(env_id, video_path))
-
-            # Do some sanity checking
-            if video_name in basenames:
-                raise error.Error('[{}] Duplicated video name {} in video list: {}'.format(env_id, video_name, videos))
-            elif metadata_name in basenames:
-                raise error.Error('[{}] Duplicated metadata file name {} in video list: {}'.format(env_id, metadata_name, videos))
-            elif not video_name_re.search(video_name):
-                raise error.Error('[{}] Invalid video name {} (must match {})'.format(env_id, video_name, video_name_re.pattern))
-            elif not metadata_name_re.search(metadata_name):
-                raise error.Error('[{}] Invalid metadata file name {} (must match {})'.format(env_id, metadata_name, metadata_name_re.pattern))
-
-            # Record that we've seen these names; add to manifest
-            basenames.add(video_name)
-            basenames.add(metadata_name)
-            manifest['videos'].append((video_name, metadata_name))
-
-            # Import the files into the archive
-            tar.add(video_path, arcname=video_name, recursive=False)
-            tar.add(metadata_path, arcname=metadata_name, recursive=False)
-
-        f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
-        try:
-            json.dump(manifest, f)
-            f.close()
-            tar.add(f.name, arcname='manifest.json')
-        finally:
-            f.close()
-            os.remove(f.name)
+def upload(*args, **kwargs):
+    raise NotImplementedError('The Gym website has been end-of-lifed. This library is the focus of the project. See https://github.com/openai/gym/issues/718#issuecomment-329661594 for details.')
--- a/gym/scoreboard/client/README.md
+++ b/gym/scoreboard/client/README.md
@@ -1,4 +0,0 @@
-# Client
-
-This client was forked from the (Stripe
-Python)[https://github.com/stripe/stripe-python] bindings.
--- a/gym/scoreboard/client/init.py
+++ b/gym/scoreboard/client/init.py
@@ -1,6 +0,0 @@
-import logging
-import os
-
-from gym import error
-
-logger = logging.getLogger(__name__)
--- a/gym/scoreboard/client/api_requestor.py
+++ b/gym/scoreboard/client/api_requestor.py
@@ -1,159 +0,0 @@
-import json
-import platform
-import six.moves.urllib as urlparse
-from six import iteritems
-
-from gym import error, version
-import gym.scoreboard.client
-from gym.scoreboard.client import http_client
-
-verify_ssl_certs = True # [SECURITY CRITICAL] only turn this off while debugging
-http_client = http_client.RequestsClient(verify_ssl_certs=verify_ssl_certs)
-
-def _build_api_url(url, query):
-    scheme, netloc, path, base_query, fragment = urlparse.urlsplit(url)
-
-    if base_query:
-        query = '%s&%s' % (base_query, query)
-
-    return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
-
-def _strip_nulls(params):
-    if isinstance(params, dict):
-        stripped = {}
-        for key, value in iteritems(params):
-            value = _strip_nulls(value)
-            if value is not None:
-                stripped[key] = value
-        return stripped
-    else:
-        return params
-
-class APIRequestor(object):
-    def __init__(self, key=None, api_base=None):
-        self.api_base = api_base or gym.scoreboard.api_base
-        self.api_key = key
-        self._client = http_client
-
-    def request(self, method, url, params=None, headers=None):
-        rbody, rcode, rheaders, my_api_key = self.request_raw(
-            method.lower(), url, params, headers)
-        resp = self.interpret_response(rbody, rcode, rheaders)
-        return resp, my_api_key
-
-    def handle_api_error(self, rbody, rcode, resp, rheaders):
-        # Rate limits were previously coded as 400's with code 'rate_limit'
-        if rcode == 429:
-            raise error.RateLimitError(
-                resp.get('detail'), rbody, rcode, resp, rheaders)
-        elif rcode in [400, 404]:
-            type = resp.get('type')
-            if type == 'about:blank':
-                type = None
-            raise error.InvalidRequestError(
-                resp.get('detail'), type,
-                rbody, rcode, resp, rheaders)
-        elif rcode == 401:
-            raise error.AuthenticationError(
-                resp.get('detail'), rbody, rcode, resp,
-                rheaders)
-        else:
-            detail = resp.get('detail')
-
-            # This information will only be returned to developers of
-            # the OpenAI Gym Scoreboard.
-            dev_info = resp.get('dev_info')
-            if dev_info:
-                detail = "{}\n\n<dev_info>\n{}\n</dev_info>".format(detail, dev_info['traceback'])
-            raise error.APIError(detail, rbody, rcode, resp,
-                                 rheaders)
-
-    def request_raw(self, method, url, params=None, supplied_headers=None):
-        """
-        Mechanism for issuing an API call
-        """
-        if self.api_key:
-            my_api_key = self.api_key
-        else:
-            my_api_key = gym.scoreboard.api_key
-
-        if my_api_key is None:
-            raise error.AuthenticationError("""You must provide an OpenAI Gym API key.
-
-(HINT: Set your API key using "gym.scoreboard.api_key = .." or "export OPENAI_GYM_API_KEY=..."). You can find your API key in the OpenAI Gym web interface: https://gym.openai.com/settings/profile.""")
-
-        abs_url = '%s%s' % (self.api_base, url)
-
-        if params:
-            encoded_params = json.dumps(_strip_nulls(params))
-        else:
-            encoded_params = None
-
-        if method == 'get' or method == 'delete':
-            if params:
-                abs_url = _build_api_url(abs_url, encoded_params)
-            post_data = None
-        elif method == 'post':
-            post_data = encoded_params
-        else:
-            raise error.APIConnectionError(
-                'Unrecognized HTTP method %r.  This may indicate a bug in the '
-                'OpenAI Gym bindings.  Please contact gym@openai.com for '
-                'assistance.' % (method,))
-
-        ua = {
-            'bindings_version': version.VERSION,
-            'lang': 'python',
-            'publisher': 'openai',
-            'httplib': self._client.name,
-        }
-        for attr, func in [['lang_version', platform.python_version],
-                           ['platform', platform.platform]]:
-            try:
-                val = func()
-            except Exception as e:
-                val = "!! %s" % (e,)
-            ua[attr] = val
-
-        headers = {
-            'Openai-Gym-User-Agent': json.dumps(ua),
-            'User-Agent': 'Openai-Gym/v1 PythonBindings/%s' % (version.VERSION,),
-            'Authorization': 'Bearer %s' % (my_api_key,)
-        }
-
-        if method == 'post':
-            headers['Content-Type'] = 'application/json'
-
-        if supplied_headers is not None:
-            for key, value in supplied_headers.items():
-                headers[key] = value
-
-        rbody, rcode, rheaders = self._client.request(
-            method, abs_url, headers, post_data)
-
-        return rbody, rcode, rheaders, my_api_key
-
-    def interpret_response(self, rbody, rcode, rheaders):
-        content_type = rheaders.get('Content-Type', '')
-        if content_type.startswith('text/plain'):
-            # Pass through plain text
-            resp = rbody
-
-            if not (200 <= rcode < 300):
-                self.handle_api_error(rbody, rcode, {}, rheaders)
-        else:
-            # TODO: Be strict about other Content-Types
-            try:
-                if hasattr(rbody, 'decode'):
-                    rbody = rbody.decode('utf-8')
-                resp = json.loads(rbody)
-            except Exception:
-                raise error.APIError(
-                    "Invalid response body from API: %s "
-                    "(HTTP response code was %d)" % (rbody, rcode),
-                    rbody, rcode, rheaders)
-
-            if not (200 <= rcode < 300):
-                self.handle_api_error(rbody, rcode, resp, rheaders)
-
-        return resp
--- a/gym/scoreboard/client/http_client.py
+++ b/gym/scoreboard/client/http_client.py
@@ -1,94 +0,0 @@
-import logging
-import requests
-import textwrap
-import six
-
-from gym import error
-from gym.scoreboard.client import util
-
-logger = logging.getLogger(__name__)
-warned = False
-
-def render_post_data(post_data):
-    if hasattr(post_data, 'fileno'): # todo: is this the right way of checking if it's a file?
-        return '%r (%d bytes)' % (post_data, util.file_size(post_data))
-    elif isinstance(post_data, (six.string_types, six.binary_type)):
-        return '%r (%d bytes)' % (post_data, len(post_data))
-    else:
-        return None
-
-class RequestsClient(object):
-    name = 'requests'
-
-    def __init__(self, verify_ssl_certs=True):
-        self._verify_ssl_certs = verify_ssl_certs
-        self.session = requests.Session()
-
-    def request(self, method, url, headers, post_data=None, files=None):
-        global warned
-        kwargs = {}
-
-        # Really, really only turn this off while debugging.
-        if not self._verify_ssl_certs:
-            if not warned:
-                logger.warn('You have disabled SSL cert verification in OpenAI Gym, so we will not verify SSL certs. This means an attacker with control of your network could snoop on or modify your data in transit.')
-                warned = True
-            kwargs['verify'] = False
-
-        try:
-            try:
-                result = self.session.request(method,
-                                              url,
-                                              headers=headers,
-                                              data=post_data,
-                                              timeout=200,
-                                              files=files,
-                                              **kwargs)
-            except TypeError as e:
-                raise TypeError(
-                    'Warning: It looks like your installed version of the '
-                    '"requests" library is not compatible with OpenAI Gym\'s'
-                    'usage thereof. (HINT: The most likely cause is that '
-                    'your "requests" library is out of date. You can fix '
-                    'that by running "pip install -U requests".) The '
-                    'underlying error was: %s' % (e,))
-
-            # This causes the content to actually be read, which could cause
-            # e.g. a socket timeout. TODO: The other fetch methods probably
-            # are susceptible to the same and should be updated.
-            content = result.content
-            status_code = result.status_code
-        except Exception as e:
-            # Would catch just requests.exceptions.RequestException, but can
-            # also raise ValueError, RuntimeError, etc.
-            self._handle_request_error(e, method, url)
-
-        if logger.level <= logging.DEBUG:
-            logger.debug(
-            """API request to %s returned (response code, response body) of
-(%d, %r)
-
-Request body was: %s""", url, status_code, content, render_post_data(post_data))
-        elif logger.level <= logging.INFO:
-            logger.info('HTTP request: %s %s %d', method.upper(), url, status_code)
-        return content, status_code, result.headers
-
-    def _handle_request_error(self, e, method, url):
-        if isinstance(e, requests.exceptions.RequestException):
-            msg = ("Unexpected error communicating with OpenAI Gym "
-                   "(while calling {} {}). "
-                   "If this problem persists, let us know at "
-                   "gym@openai.com.".format(method, url))
-            err = "%s: %s" % (type(e).__name__, str(e))
-        else:
-            msg = ("Unexpected error communicating with OpenAI Gym. "
-                   "It looks like there's probably a configuration "
-                   "issue locally.  If this problem persists, let us "
-                   "know at gym@openai.com.")
-            err = "A %s was raised" % (type(e).__name__,)
-            if str(e):
-                err += " with error message %s" % (str(e),)
-            else:
-                err += " with no error message"
-        msg = textwrap.fill(msg, width=140) + "\n\n(Network error: %s)" % (err,)
-        raise error.APIConnectionError(msg)
--- a/gym/scoreboard/client/resource.py
+++ b/gym/scoreboard/client/resource.py
@@ -1,395 +0,0 @@
-import json
-import warnings
-import sys
-from six import string_types
-from six import iteritems
-import six.moves.urllib as urllib
-
-import gym
-from gym import error
-from gym.scoreboard.client import api_requestor, util
-
-def convert_to_gym_object(resp, api_key):
-    types = {
-        'evaluation': Evaluation,
-        'file': FileUpload,
-        'benchmark_run': BenchmarkRun,
-    }
-
-    if isinstance(resp, list):
-        return [convert_to_gym_object(i, api_key) for i in resp]
-    elif isinstance(resp, dict) and not isinstance(resp, GymObject):
-        resp = resp.copy()
-        klass_name = resp.get('object')
-        if isinstance(klass_name, string_types):
-            klass = types.get(klass_name, GymObject)
-        else:
-            klass = GymObject
-        return klass.construct_from(resp, api_key)
-    else:
-        return resp
-
-def populate_headers(idempotency_key):
-    if idempotency_key is not None:
-        return {"Idempotency-Key": idempotency_key}
-    return None
-
-def _compute_diff(current, previous):
-    if isinstance(current, dict):
-        previous = previous or {}
-        diff = current.copy()
-        for key in set(previous.keys()) - set(diff.keys()):
-            diff[key] = ""
-        return diff
-    return current if current is not None else ""
-
-class GymObject(dict):
-    def __init__(self, id=None, api_key=None, **params):
-        super(GymObject, self).__init__()
-
-        self._unsaved_values = set()
-        self._transient_values = set()
-
-        self._retrieve_params = params
-        self._previous = None
-
-        object.__setattr__(self, 'api_key', api_key)
-
-        if id:
-            self['id'] = id
-
-    def update(self, update_dict):
-        for k in update_dict:
-            self._unsaved_values.add(k)
-
-        return super(GymObject, self).update(update_dict)
-
-    def __setattr__(self, k, v):
-        if k[0] == '_' or k in self.__dict__:
-            return super(GymObject, self).__setattr__(k, v)
-        else:
-            self[k] = v
-
-    def __getattr__(self, k):
-        if k[0] == '_':
-            raise AttributeError(k)
-
-        try:
-            return self[k]
-        except KeyError as err:
-            raise AttributeError(*err.args)
-
-    def __delattr__(self, k):
-        if k[0] == '_' or k in self.__dict__:
-            return super(GymObject, self).__delattr__(k)
-        else:
-            del self[k]
-
-    def __setitem__(self, k, v):
-        if v == "":
-            raise ValueError(
-                "You cannot set %s to an empty string. "
-                "We interpret empty strings as None in requests."
-                "You may set %s.%s = None to delete the property" % (
-                    k, str(self), k))
-
-        super(GymObject, self).__setitem__(k, v)
-
-        # Allows for unpickling in Python 3.x
-        if not hasattr(self, '_unsaved_values'):
-            self._unsaved_values = set()
-
-        self._unsaved_values.add(k)
-
-    def __getitem__(self, k):
-        try:
-            return super(GymObject, self).__getitem__(k)
-        except KeyError as err:
-            if k in self._transient_values:
-                raise KeyError(
-                    "%r.  HINT: The %r attribute was set in the past."
-                    "It was then wiped when refreshing the object with "
-                    "the result returned by Rl_Gym's API, probably as a "
-                    "result of a save().  The attributes currently "
-                    "available on this object are: %s" %
-                    (k, k, ', '.join(self.keys())))
-            else:
-                raise err
-
-    def __delitem__(self, k):
-        super(GymObject, self).__delitem__(k)
-
-        # Allows for unpickling in Python 3.x
-        if hasattr(self, '_unsaved_values'):
-            self._unsaved_values.remove(k)
-
-    @classmethod
-    def construct_from(cls, values, key):
-        instance = cls(values.get('id'), api_key=key)
-        instance.refresh_from(values, api_key=key)
-        return instance
-
-    def refresh_from(self, values, api_key=None, partial=False):
-        self.api_key = api_key or getattr(values, 'api_key', None)
-
-        # Wipe old state before setting new.  This is useful for e.g.
-        # updating a customer, where there is no persistent card
-        # parameter.  Mark those values which don't persist as transient
-        if partial:
-            self._unsaved_values = (self._unsaved_values - set(values))
-        else:
-            removed = set(self.keys()) - set(values)
-            self._transient_values = self._transient_values | removed
-            self._unsaved_values = set()
-            self.clear()
-
-        self._transient_values = self._transient_values - set(values)
-
-        for k, v in iteritems(values):
-            super(GymObject, self).__setitem__(
-                k, convert_to_gym_object(v, api_key))
-
-        self._previous = values
-
-    @classmethod
-    def api_base(cls):
-        return None
-
-    def request(self, method, url, params=None, headers=None):
-        if params is None:
-            params = self._retrieve_params
-        requestor = api_requestor.APIRequestor(
-            key=self.api_key, api_base=self.api_base())
-        response, api_key = requestor.request(method, url, params, headers)
-
-        return convert_to_gym_object(response, api_key)
-
-    def __repr__(self):
-        ident_parts = [type(self).__name__]
-
-        if isinstance(self.get('object'), string_types):
-            ident_parts.append(self.get('object'))
-
-        if isinstance(self.get('id'), string_types):
-            ident_parts.append('id=%s' % (self.get('id'),))
-
-        unicode_repr = '<%s at %s> JSON: %s' % (
-            ' '.join(ident_parts), hex(id(self)), str(self))
-
-        if sys.version_info[0] < 3:
-            return unicode_repr.encode('utf-8')
-        else:
-            return unicode_repr
-
-    def __str__(self):
-        return json.dumps(self, sort_keys=True, indent=2)
-
-    def to_dict(self):
-        warnings.warn(
-            'The `to_dict` method is deprecated and will be removed in '
-            'version 2.0 of the Rl_Gym bindings. The GymObject is '
-            'itself now a subclass of `dict`.',
-            DeprecationWarning)
-
-        return dict(self)
-
-    @property
-    def gym_id(self):
-        return self.id
-
-    def serialize(self, previous):
-        params = {}
-        unsaved_keys = self._unsaved_values or set()
-        previous = previous or self._previous or {}
-
-        for k, v in self.items():
-            if k == 'id' or (isinstance(k, str) and k.startswith('_')):
-                continue
-            elif isinstance(v, APIResource):
-                continue
-            elif hasattr(v, 'serialize'):
-                params[k] = v.serialize(previous.get(k, None))
-            elif k in unsaved_keys:
-                params[k] = _compute_diff(v, previous.get(k, None))
-
-        return params
-
-class APIResource(GymObject):
-    @classmethod
-    def retrieve(cls, id, api_key=None, **params):
-        instance = cls(id, api_key, **params)
-        instance.refresh()
-        return instance
-
-    def refresh(self):
-        self.refresh_from(self.request('get', self.instance_path()))
-        return self
-
-    @classmethod
-    def class_name(cls):
-        if cls == APIResource:
-            raise NotImplementedError(
-                'APIResource is an abstract class.  You should perform '
-                'actions on its subclasses')
-        return str(urllib.parse.quote_plus(cls.__name__.lower()))
-
-    @classmethod
-    def class_path(cls):
-        cls_name = cls.class_name()
-        return "/v1/%ss" % (cls_name,)
-
-    def instance_path(self):
-        id = self.get('id')
-        if not id:
-            raise error.InvalidRequestError(
-                'Could not determine which URL to request: %s instance '
-                'has invalid ID: %r' % (type(self).__name__, id), 'id')
-        id = util.utf8(id)
-        base = self.class_path()
-        extn = urllib.parse.quote_plus(id)
-        return "%s/%s" % (base, extn)
-
-class ListObject(GymObject):
-    def list(self, **params):
-        return self.request('get', self['url'], params)
-
-    def all(self, **params):
-        warnings.warn("The `all` method is deprecated and will"
-                      "be removed in future versions. Please use the "
-                      "`list` method instead",
-                      DeprecationWarning)
-        return self.list(**params)
-
-    def auto_paging_iter(self):
-        page = self
-        params = dict(self._retrieve_params)
-
-        while True:
-            item_id = None
-            for item in page:
-                item_id = item.get('id', None)
-                yield item
-
-            if not getattr(page, 'has_more', False) or item_id is None:
-                return
-
-            params['starting_after'] = item_id
-            page = self.list(**params)
-
-    def create(self, idempotency_key=None, **params):
-        headers = populate_headers(idempotency_key)
-        return self.request('post', self['url'], params, headers)
-
-    def retrieve(self, id, **params):
-        base = self.get('url')
-        id = util.utf8(id)
-        extn = urllib.parse.quote_plus(id)
-        url = "%s/%s" % (base, extn)
-
-        return self.request('get', url, params)
-
-    def __iter__(self):
-        return getattr(self, 'data', []).__iter__()
-
-# Classes of API operations
-
-class ListableAPIResource(APIResource):
-    @classmethod
-    def all(cls, *args, **params):
-        warnings.warn("The `all` class method is deprecated and will"
-                      "be removed in future versions. Please use the "
-                      "`list` class method instead",
-                      DeprecationWarning)
-        return cls.list(*args, **params)
-
-    @classmethod
-    def auto_paging_iter(self, *args, **params):
-        return self.list(*args, **params).auto_paging_iter()
-
-    @classmethod
-    def list(cls, api_key=None, idempotency_key=None, **params):
-        requestor = api_requestor.APIRequestor(api_key)
-        url = cls.class_path()
-        response, api_key = requestor.request('get', url, params)
-        return convert_to_gym_object(response, api_key)
-
-
-class CreateableAPIResource(APIResource):
-    @classmethod
-    def create(cls, api_key=None, idempotency_key=None, **params):
-        requestor = api_requestor.APIRequestor(api_key)
-        url = cls.class_path()
-        headers = populate_headers(idempotency_key)
-        response, api_key = requestor.request('post', url, params, headers)
-        return convert_to_gym_object(response, api_key)
-
-
-class UpdateableAPIResource(APIResource):
-    def save(self, idempotency_key=None):
-        updated_params = self.serialize(None)
-        headers = populate_headers(idempotency_key)
-
-        if updated_params:
-            self.refresh_from(self.request('post', self.instance_path(),
-                                           updated_params, headers))
-        else:
-            util.logger.debug("Trying to save already saved object %r", self)
-        return self
-
-
-class DeletableAPIResource(APIResource):
-    def delete(self, **params):
-        self.refresh_from(self.request('delete', self.instance_path(), params))
-        return self
-
-## Our resources
-
-class FileUpload(ListableAPIResource):
-    @classmethod
-    def class_name(cls):
-        return 'file'
-
-    @classmethod
-    def create(cls, api_key=None, **params):
-        requestor = api_requestor.APIRequestor(
-            api_key, api_base=cls.api_base())
-        url = cls.class_path()
-        response, api_key = requestor.request(
-            'post', url, params=params)
-        return convert_to_gym_object(response, api_key)
-
-    def put(self, contents, encode='json'):
-        supplied_headers = {
-            "Content-Type": self.content_type
-        }
-        if encode == 'json':
-            contents = json.dumps(contents)
-        elif encode is None:
-            pass
-        else:
-            raise error.Error('Encode request for put must be "json" or None, not {}'.format(encode))
-
-        files = {'file': contents}
-
-        body, code, headers = api_requestor.http_client.request(
-            'post', self.post_url, post_data=self.post_fields, files=files, headers={})
-        if code != 204:
-            raise error.Error("Upload to S3 failed. If error persists, please contact us at gym@openai.com this message. S3 returned '{} -- {}'. Tried 'POST {}' with fields {}.".format(code, body, self.post_url, self.post_fields))
-
-class Evaluation(CreateableAPIResource):
-    def web_url(self):
-        return "%s/evaluations/%s" % (gym.scoreboard.web_base, self.get('id'))
-
-class Algorithm(CreateableAPIResource):
-    pass
-
-class BenchmarkRun(CreateableAPIResource, UpdateableAPIResource):
-    @classmethod
-    def class_name(cls):
-        return 'benchmark_run'
-
-    def web_url(self):
-        return "%s/benchmark_runs/%s" % (gym.scoreboard.web_base, self.get('id'))
-
-    def commit(self):
-        return self.request('post', '{}/commit'.format(self.instance_path()))
--- a/gym/scoreboard/client/tests/init.py
+++ b/gym/scoreboard/client/tests/init.py
--- a/gym/scoreboard/client/tests/helper.py
+++ b/gym/scoreboard/client/tests/helper.py
@@ -1,32 +0,0 @@
-import mock
-import unittest
-import uuid
-
-def fake_id(prefix):
-    entropy = ''.join([a for a in str(uuid.uuid4()) if a.isalnum()])
-    return '{}_{}'.format(prefix, entropy)
-
-class APITestCase(unittest.TestCase):
-    def setUp(self):
-        super(APITestCase, self).setUp()
-        self.requestor_patcher = mock.patch('gym.scoreboard.client.api_requestor.APIRequestor')
-        requestor_class_mock = self.requestor_patcher.start()
-        self.requestor_mock = requestor_class_mock.return_value
-
-    def mock_response(self, res):
-        self.requestor_mock.request = mock.Mock(return_value=(res, 'reskey'))
-
-class TestData(object):
-    @classmethod
-    def file_upload_response(cls):
-        return {
-            'id': fake_id('file'),
-            'object': 'file',
-        }
-
-    @classmethod
-    def evaluation_response(cls):
-        return {
-            'id': fake_id('file'),
-            'object': 'evaluation',
-        }
--- a/gym/scoreboard/client/tests/test_evaluation.py
+++ b/gym/scoreboard/client/tests/test_evaluation.py
@@ -1,16 +0,0 @@
-from gym.scoreboard.client.tests import helper
-from gym import scoreboard
-
-class EvaluationTest(helper.APITestCase):
-    def test_create_evaluation(self):
-        self.mock_response(helper.TestData.evaluation_response())
-
-        evaluation = scoreboard.Evaluation.create()
-        assert isinstance(evaluation, scoreboard.Evaluation)
-
-        self.requestor_mock.request.assert_called_with(
-            'post',
-            '/v1/evaluations',
-            {},
-            None
-        )
--- a/gym/scoreboard/client/tests/test_file_upload.py
+++ b/gym/scoreboard/client/tests/test_file_upload.py
@@ -1,15 +0,0 @@
-from gym.scoreboard.client.tests import helper
-from gym import scoreboard
-
-class FileUploadTest(helper.APITestCase):
-    def test_create_file_upload(self):
-        self.mock_response(helper.TestData.file_upload_response())
-
-        file_upload = scoreboard.FileUpload.create()
-        assert isinstance(file_upload, scoreboard.FileUpload), 'File upload is: {!r}'.format(file_upload)
-
-        self.requestor_mock.request.assert_called_with(
-            'post',
-            '/v1/files',
-            params={},
-        )
--- a/gym/scoreboard/client/util.py
+++ b/gym/scoreboard/client/util.py
@@ -1,45 +0,0 @@
-import functools
-import logging
-import os
-import random
-import sys
-import time
-
-from gym import error
-
-logger = logging.getLogger(__name__)
-
-def utf8(value):
-    if isinstance(value, unicode) and sys.version_info < (3, 0):
-        return value.encode('utf-8')
-    else:
-        return value
-
-def file_size(f):
-    return os.fstat(f.fileno()).st_size
-
-def retry_exponential_backoff(f, errors, max_retries=5, interval=1):
-    @functools.wraps(f)
-    def wrapped(*args, **kwargs):
-        num_retries = 0
-        caught_errors = []
-        while True:
-            try:
-                result = f(*args, **kwargs)
-            except errors as e:
-                logger.error("Caught error in %s: %s" % (f.__name__, e))
-                caught_errors.append(e)
-
-                if num_retries < max_retries:
-                    backoff = random.randint(1, 2 ** num_retries) * interval
-                    logger.error("Retrying in %.1fs..." % backoff)
-                    time.sleep(backoff)
-                    num_retries += 1
-                else:
-                    msg = "Exceeded allowed retries. Here are the individual error messages:\n\n"
-                    msg += "\n\n".join("%s: %s" % (type(e).__name__, str(e)) for e in caught_errors)
-                    raise error.RetriesExceededError(msg)
-            else:
-                break
-        return result
-    return wrapped
--- a/gym/scoreboard/registration.py
+++ b/gym/scoreboard/registration.py
@@ -1,60 +0,0 @@
-import collections
-import gym.envs
-import logging
-
-logger = logging.getLogger(__name__)
-
-class RegistrationError(Exception):
-    pass
-
-class Registry(object):
-    def __init__(self):
-        self.groups = collections.OrderedDict()
-        self.envs = collections.OrderedDict()
-        self.benchmarks = collections.OrderedDict()
-
-    def env(self, id):
-        return self.envs[id]
-
-    def add_group(self, id, name, description, universe=False):
-        self.groups[id] = {
-            'id': id,
-            'name': name,
-            'description': description,
-            'envs': [],
-            'universe': universe,
-        }
-
-    def add_task(self, id, group, summary=None, description=None, background=None, deprecated=False, experimental=False, contributor=None):
-        self.envs[id] = {
-            'group': group,
-            'id': id,
-            'summary': summary,
-            'description': description,
-            'background': background,
-            'deprecated': deprecated,
-            'experimental': experimental,
-            'contributor': contributor,
-        }
-        if not deprecated:
-            self.groups[group]['envs'].append(id)
-
-    def add_benchmark(self, id, name, description, unavailable):
-        self.benchmarks[id] = {
-            'id': id,
-            'name': name,
-            'description': description,
-            'unavailable': unavailable,
-        }
-
-    def finalize(self, strict=False):
-        # We used to check whether the scoreboard and environment ID
-        # registries matched here. However, we now support various
-        # registrations living in various repos, so this is less
-        # important.
-        pass
-
-registry = Registry()
-add_group = registry.add_group
-add_task = registry.add_task
-add_benchmark = registry.add_benchmark
--- a/gym/scoreboard/tests/init.py
+++ b/gym/scoreboard/tests/init.py
--- a/gym/scoreboard/tests/test_registration.py
+++ b/gym/scoreboard/tests/test_registration.py
@@ -1,7 +0,0 @@
-from gym.scoreboard import registration
-
-def test_correct_registration():
-    try:
-        registration.registry.finalize(strict=True)
-    except registration.RegistrationError as e:
-        assert False, "Caught: {}".format(e)
--- a/gym/scoreboard/tests/test_scoring.py
+++ b/gym/scoreboard/tests/test_scoring.py
@@ -1,442 +0,0 @@
-import numpy as np
-from collections import defaultdict
-from gym.benchmarks import registration, scoring
-
-import gym
-gym.undo_logger_setup()
-
-benchmark = registration.Benchmark(
-    id='TestBenchmark-v0',
-    scorer=scoring.ClipTo01ThenAverage(),
-    tasks=[
-        {'env_id': 'CartPole-v0',
-         'trials': 1,
-         'max_timesteps': 100,
-        },
-        {'env_id': 'Pendulum-v0',
-         'trials': 1,
-         'max_timesteps': 100,
-        },
-    ]
-)
-
-def _is_close(x, target):
-    return np.all(np.isclose(x, target))
-
-def _eq_list_of_arrays(x, y):
-    return np.all([len(a) == len(b) and np.all(a == b) for a, b in zip(x, y)])
-
-def _assert_evaluation_result(result, score=None, solves=None, rewards=None, lengths=None, timestamps=None):
-    debug_str = "score_evaluation={}".format(result)
-    if score is not None:
-        assert _is_close(result['scores'], score), debug_str
-    if solves is not None:
-        assert _eq_list_of_arrays(result['solves'], solves), debug_str
-    if rewards is not None:
-        assert _eq_list_of_arrays(result['rewards'], rewards), debug_str
-    if lengths is not None:
-        assert _eq_list_of_arrays(result['lengths'], lengths), debug_str
-
-def _assert_benchmark_result(result, score=None, solves=None, summed_training_seconds=None, start_to_finish_seconds=None):
-    debug_str = "benchmark_result={}".format(result)
-    if score is not None:
-        assert _is_close(result['scores'], score), debug_str
-    if solves is not None:
-        assert np.all(result['solves']) == solves, debug_str
-
-def _assert_benchmark_score(scores, score=None, num_envs_solved=None, summed_training_seconds=None, summed_task_wall_time=None, start_to_finish_seconds=None):
-    debug_str = "scores={} score={} num_envs_solved={} summed_training_seconds={} summed_wall_task_time={} start_to_finish_seconds={}".format(scores, score, num_envs_solved, summed_training_seconds, summed_task_wall_time, start_to_finish_seconds)
-    if score is not None:
-        assert _is_close(scores['score'], score), debug_str
-    if num_envs_solved is not None:
-        assert scores['num_envs_solved'] == num_envs_solved, debug_str
-    if summed_training_seconds is not None:
-        assert _is_close(scores['summed_training_seconds'], summed_training_seconds), debug_str
-    if summed_task_wall_time is not None:
-        assert _is_close(scores['summed_task_wall_time'], summed_task_wall_time), debug_str
-    if start_to_finish_seconds is not None:
-        assert _is_close(scores['start_to_finish_seconds'], start_to_finish_seconds), debug_str
-
-def _benchmark_result_helper(benchmark, **kwargs):
-    for k, defval in dict(
-            env_id='CartPole-v0',
-            data_sources=[0],
-            initial_reset_timestamps=[1],
-            episode_lengths=[1],
-            episode_rewards=[1],
-            episode_types=['t'],
-            timestamps=[2]).items():
-        kwargs.setdefault(k, defval)
-
-    return benchmark.score_evaluation(**kwargs)
-
-def test_clip_average_evaluation_scoring():
-    benchmark = registration.Benchmark(
-        id='TestBenchmark-v0',
-        scorer=scoring.ClipTo01ThenAverage(num_episodes=1),
-        tasks=[
-            {'env_id': 'CartPole-v0',
-             'trials': 1,
-             'max_timesteps': 5,
-            },
-        ]
-    )
-    # simple scoring
-    benchmark_result = _benchmark_result_helper(benchmark)
-    _assert_benchmark_result(benchmark_result, score=0.01)
-
-    # test a successful run
-    benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100, 100], episode_lengths=[1, 1])
-    _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
-
-def test_clip_average_evaluation_not_enough_rewards():
-    benchmark = registration.Benchmark(
-        id='TestBenchmark-v0',
-        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
-        tasks=[
-            {'env_id': 'CartPole-v0',
-             'trials': 1,
-             'max_timesteps': 5,
-            },
-        ]
-    )
-    # simple scoring
-    benchmark_result = _benchmark_result_helper(benchmark)
-    _assert_evaluation_result(
-        benchmark_result,
-        score=0.005,
-        rewards=[np.array([1, 0])],
-        lengths=[np.array([1, 0])],
-    )
-
-def test_clip_average_max_timesteps():
-    benchmark = registration.Benchmark(
-        id='TestBenchmark-v0',
-        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
-        tasks=[
-            {'env_id': 'CartPole-v0',
-             'trials': 1,
-             'max_timesteps': 2,
-            },
-        ]
-    )
-
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[1,1], episode_rewards=[1,1], episode_types=['t','t'], timestamps=[2,3])
-    _assert_benchmark_result(benchmark_result, score=0.01)
-
-    # make sure we only include the first result because of timesteps
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[1,100,100], episode_rewards=[1,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
-    _assert_benchmark_result(benchmark_result, score=0.005, solves=False)
-
-def test_clip_average_max_seconds():
-    benchmark = registration.Benchmark(
-        id='TestBenchmark-v0',
-        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
-        tasks=[
-            {'env_id': 'CartPole-v0',
-             'trials': 1,
-             'max_seconds': 1,
-            },
-        ]
-    )
-
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[100,100], episode_rewards=[0,100], episode_types=['t','t'], timestamps=[1.5, 2])
-    _assert_benchmark_result(benchmark_result, score=0.5)
-
-    # make sure we only include the first result because of wall clock time
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[100,100,100], episode_rewards=[0,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
-    _assert_benchmark_result(benchmark_result, score=0.0)
-
-def test_clip_average_benchmark_scoring():
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, timestamps=[i + 2]))
-    scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
-
-    _assert_benchmark_score(scores, score=0.0001, num_envs_solved=0, summed_training_seconds=3.0, start_to_finish_seconds=2.0)
-
-def test_clip_average_benchmark_empty():
-    scores = scoring.benchmark_aggregate_score(benchmark, {})
-
-    benchmark_results = defaultdict(list)
-    task = benchmark.tasks[0]
-    env_id = task.env_id
-    benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id))
-    scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
-
-    _assert_benchmark_score(scores, score=0.00005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
-
-def test_clip_average_benchmark_solved():
-    benchmark_results = defaultdict(list)
-    N = 200
-    for i, task in enumerate(benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(benchmark.score_evaluation(
-            env_id,
-            data_sources=[0] * N,
-            initial_reset_timestamps=[1],
-            episode_lengths=[1] * N,
-            episode_rewards=[1000] * N,
-            episode_types=['t'] * N,
-            timestamps=list(range(N)),
-        ))
-    scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=1.0, num_envs_solved=len(benchmark.tasks))
-
-def test_clip_average_benchmark_incomplete():
-    benchmark_results = defaultdict(list)
-    env_id = benchmark.tasks[0].env_id
-    benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, timestamps=[2]))
-    scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=0.00005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
-
-def test_clip_average_benchmark_extra():
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, timestamps=[i + 2]))
-
-    # add one more at the end with a high reward
-    benchmark_results[env_id].append(_benchmark_result_helper(benchmark, env_id=env_id, episode_rewards=[100], timestamps=[2]))
-
-    scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=0.0001, num_envs_solved=0, summed_training_seconds=3.0, summed_task_wall_time=3.0, start_to_finish_seconds=2.0)
-
-def test_clip_average_benchmark_eval_handling():
-    # make sure we handle separate evaluation, training episodes properly
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(benchmark.score_evaluation(
-            env_id,
-            data_sources=[0, 1, 1],
-            initial_reset_timestamps=[1, 1],
-            episode_lengths=[1, 1, 1],
-            episode_rewards=[1, 2, 3],
-            episode_types=['e', 't', 'e'],
-            timestamps=[i + 2, i + 3, i + 4],
-        ))
-    scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=0.0004, num_envs_solved=0, summed_training_seconds=5.0, summed_task_wall_time=5.0, start_to_finish_seconds=3.0)
-
-# Tests for total reward scoring
-
-def test_clip_scoring():
-    benchmark = registration.Benchmark(
-        id='TestBenchmark-v0',
-        scorer=scoring.TotalReward(),
-        tasks=[
-            {'env_id': 'CartPole-v0',
-             'trials': 1,
-             'max_timesteps': 5,
-            },
-        ]
-    )
-    # simple scoring
-    benchmark_result = _benchmark_result_helper(benchmark)
-    _assert_benchmark_result(benchmark_result, score=0.01)
-
-    # test a successful run
-    benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100])
-    _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
-
-def test_max_timesteps():
-    benchmark = registration.Benchmark(
-        id='TestBenchmark-v0',
-        scorer=scoring.TotalReward(),
-        tasks=[
-            {'env_id': 'CartPole-v0',
-             'trials': 1,
-             'max_timesteps': 2,
-            },
-        ]
-    )
-
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[1,1], episode_rewards=[1,1], episode_types=['t','t'], timestamps=[2,3])
-    _assert_benchmark_result(benchmark_result, score=0.01)
-
-    # make sure we only include the first result because of timesteps
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[1,100,100], episode_rewards=[1,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
-    _assert_benchmark_result(benchmark_result, score=0.01, solves=False)
-
-def test_max_seconds():
-    benchmark = registration.Benchmark(
-        id='TestBenchmark-v0',
-        scorer=scoring.TotalReward(),
-        tasks=[
-            {'env_id': 'CartPole-v0',
-             'trials': 1,
-             'max_seconds': 1,
-            },
-        ]
-    )
-
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0], episode_lengths=[100,100], episode_rewards=[0,100], episode_types=['t','t'], timestamps=[1.5, 2])
-    _assert_benchmark_result(benchmark_result, score=0.5)
-
-    # make sure we only include the first result because of wall clock time
-    benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0,0,0], episode_lengths=[100,100,100], episode_rewards=[0,100,100], episode_types=['t','t','t'], timestamps=[2,102,202])
-    _assert_benchmark_result(benchmark_result, score=0.0)
-
-reward_benchmark = registration.Benchmark(
-    id='TestBenchmark-v0',
-    scorer=scoring.TotalReward(),
-    tasks=[
-        {'env_id': 'CartPole-v0',
-         'trials': 1,
-         'max_timesteps': 5,
-        },
-        {'env_id': 'Pendulum-v0',
-         'trials': 1,
-         'max_timesteps': 5,
-        },
-    ]
-)
-
-def test_total_reward_evaluation_scoring():
-    benchmark_result = _benchmark_result_helper(reward_benchmark)
-    _assert_evaluation_result(
-        benchmark_result,
-        score=0.01,
-        rewards=[np.array([1])],
-        lengths=[np.array([1])],
-    )
-
-def test_total_reward_benchmark_scoring():
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(reward_benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[i + 2]))
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
-
-    _assert_benchmark_score(scores, score=0.01, num_envs_solved=0, summed_training_seconds=3.0, summed_task_wall_time=3.0, start_to_finish_seconds=2.0)
-
-def test_total_reward_benchmark_empty():
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, {})
-
-    benchmark_results = defaultdict(list)
-    task = reward_benchmark.tasks[0]
-    env_id = task.env_id
-    benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id))
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
-
-    _assert_benchmark_score(scores, score=0.005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
-
-def test_total_reward_benchmark_solved():
-    benchmark_results = defaultdict(list)
-    N = 200
-    for i, task in enumerate(reward_benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(reward_benchmark.score_evaluation(
-            env_id,
-            data_sources=[0] * N,
-            initial_reset_timestamps=[1],
-            episode_lengths=[1] * N,
-            episode_rewards=[1000] * N,
-            episode_types=['t'] * N,
-            timestamps=list(range(N)),
-        ))
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=1.0, num_envs_solved=len(reward_benchmark.tasks))
-
-def test_benchmark_incomplete():
-    benchmark_results = defaultdict(list)
-    env_id = reward_benchmark.tasks[0].env_id
-    benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[2]))
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=0.005, num_envs_solved=0, summed_training_seconds=1.0, start_to_finish_seconds=1.0)
-
-def test_benchmark_extra():
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(reward_benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[i + 2]))
-
-    # add one more at the end with a high reward
-    benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, episode_rewards=[100], timestamps=[2]))
-
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=0.01, num_envs_solved=0, summed_training_seconds=3.0, start_to_finish_seconds=2.0)
-
-def test_benchmark_simple():
-    # TODO what is this testing?
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(reward_benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(_benchmark_result_helper(reward_benchmark, env_id=env_id, timestamps=[i + 2]))
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=0.01, num_envs_solved=0, summed_training_seconds=3.0, start_to_finish_seconds=2.0)
-
-def test_benchmark_eval_handling():
-    # make sure we count all episodes
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(reward_benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(reward_benchmark.score_evaluation(
-            env_id,
-            data_sources=[0, 1, 1],
-            initial_reset_timestamps=[1, 2],
-            episode_lengths=[1, 1, 1],
-            episode_rewards=[1, 2, 3],
-            episode_types=['e', 't', 'e'],
-            timestamps=[i + 2, i + 3, i + 4],
-        ))
-    scores = scoring.benchmark_aggregate_score(reward_benchmark, benchmark_results)
-    _assert_benchmark_score(scores, score=0.02, num_envs_solved=0, summed_training_seconds=8.0, summed_task_wall_time=7.0, start_to_finish_seconds=4.0)
-
-
-reward_per_time_benchmark = registration.Benchmark(
-    id='TestBenchmark-v0',
-    scorer=scoring.RewardPerTime(),
-    tasks=[
-        {'env_id': 'CartPole-v0',
-         'trials': 1,
-         'max_timesteps': 5,
-        },
-        {'env_id': 'Pendulum-v0',
-         'trials': 1,
-         'max_timesteps': 5,
-        },
-    ]
-)
-
-def test_reward_per_time_benchmark_scoring():
-    benchmark_results = defaultdict(list)
-    for i, task in enumerate(reward_per_time_benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(_benchmark_result_helper(reward_per_time_benchmark, env_id=env_id, timestamps=[i + 2]))
-    scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, benchmark_results)
-
-    _assert_benchmark_score(scores, score=0.0075, num_envs_solved=0, summed_training_seconds=3.0, summed_task_wall_time=3.0, start_to_finish_seconds=2.0)
-
-def test_reward_per_time_benchmark_empty():
-    scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, {})
-
-    benchmark_results = defaultdict(list)
-    task = reward_per_time_benchmark.tasks[0]
-    env_id = task.env_id
-    benchmark_results[env_id].append(_benchmark_result_helper(reward_per_time_benchmark, env_id=env_id, episode_lengths=[10]))
-    scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, benchmark_results)
-
-    _assert_benchmark_score(scores, score=0.0, num_envs_solved=0, summed_training_seconds=0.0, start_to_finish_seconds=0.0)
-
-def test_reward_per_time_benchmark_solved():
-    benchmark_results = defaultdict(list)
-    N = 200
-    for i, task in enumerate(reward_per_time_benchmark.tasks):
-        env_id = task.env_id
-        benchmark_results[env_id].append(reward_per_time_benchmark.score_evaluation(
-            env_id,
-            data_sources=[0] * N,
-            initial_reset_timestamps=[1],
-            episode_lengths=[1] * N,
-            episode_rewards=[1000] * N,
-            episode_types=['t'] * N,
-            timestamps=list(range(N)),
-        ))
-    scores = scoring.benchmark_aggregate_score(reward_per_time_benchmark, benchmark_results)
-
-    # Currently reward per time has no solved functionality, so num_envs_solved
-    # is 0
-    _assert_benchmark_score(scores, score=1.0, num_envs_solved=0)
--- a/gym/wrappers/monitoring.py
+++ b/gym/wrappers/monitoring.py
@@ -2,7 +2,6 @@ import gym
 from gym import Wrapper
 from gym import error, version
 import os, json, logging, numpy as np, six
-from gym.monitoring import stats_recorder, video_recorder
 from gym.utils import atomic_write, closer
 from gym.utils.json_utils import json_encode_np

@@ -384,3 +383,6 @@ def collapse_env_infos(env_infos, training_dir):
        if key not in first:
            raise error.Error("env_info {} from training directory {} is missing expected key {}. This is unexpected and likely indicates a bug in gym.".format(first, training_dir, key))
    return first
+
+# Put circular import at the bottom. Even better: break circular import
+from gym.monitoring import stats_recorder, video_recorder