TimeLimit refactor with Monitor Simplification (#482)

* fix double reset, as suggested by @jietang

* better floors and ceilings

* add convenience methods to monitor

* add wrappers to gym namespace

* allow playing Atari games, with potentially more coming in the future

* simplify example in docs

* Move play out of the Env

* fix tests

* no more deprecation warnings

* remove env.monitor

* monitor simplification

* monitor simplifications

* monitor related fixes

* a few changes suggested by linter

* timestep_limit fixes

* keep track of gym env variables for future compatibility

* timestep_limit => max_episode_timesteps

* don't apply TimeLimit wrapper in make for VNC envs

* Respect old timestep_limit argument

* Pass max_episode_seconds through registration

* Don't include deprecation warnings yet
This commit is contained in:
Tom Brown
2017-02-01 13:10:59 -08:00
committed by GitHub
parent c17ac6cc55
commit d337f4e571
19 changed files with 689 additions and 521 deletions

View File

@@ -1,9 +1,8 @@
import logging
import pkg_resources
import re
import sys
from gym import error
import warnings
logger = logging.getLogger(__name__)
# This format is true today, but it's *not* an official spec.
@@ -37,7 +36,7 @@ class EnvSpec(object):
trials (int): The number of trials run in official evaluation
"""
def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, timestep_limit=None):
def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None):
self.id = id
# Evaluation parameters
self.trials = trials
@@ -49,7 +48,24 @@ class EnvSpec(object):
tags = {}
self.tags = tags
self.timestep_limit = timestep_limit
# BACKWARDS COMPAT 2017/1/18
if tags.get('wrapper_config.TimeLimit.max_episode_steps'):
max_episode_steps = tags.get('wrapper_config.TimeLimit.max_episode_steps')
# TODO: Add the following deprecation warning after 2017/02/18
# warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3")
tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps
######
# BACKWARDS COMPAT 2017/1/31
if timestep_limit is not None:
max_episode_steps = timestep_limit
# TODO: Add the following deprecation warning after 2017/03/01
# warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit))
######
self.max_episode_steps = max_episode_steps
self.max_episode_seconds = max_episode_seconds
# We may make some of these other parameters public if they're
# useful.
@@ -71,6 +87,7 @@ class EnvSpec(object):
# Make the enviroment aware of which spec it came from.
env.spec = self
return env
def __repr__(self):
@@ -78,15 +95,12 @@ class EnvSpec(object):
@property
def timestep_limit(self):
logger.warn("DEPRECATION WARNING: env.spec.timestep_limit has been deprecated. Replace your call to `env.spec.timestep_limit` with `env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')`. This change was made 12/28/2016 and is included in version 0.7.0")
return self.tags.get('wrapper_config.TimeLimit.max_episode_steps')
return self.max_episode_steps
@timestep_limit.setter
def timestep_limit(self, timestep_limit):
if timestep_limit is not None:
logger.warn(
"DEPRECATION WARNING: env.spec.timestep_limit has been deprecated. Replace any calls to `register(timestep_limit=200)` with `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}`, . This change was made 12/28/2016 and is included in gym version 0.7.0. If you are getting many of these warnings, you may need to update universe past version 0.21.1")
self.tags['wrapper_config.TimeLimit.max_episode_steps'] = timestep_limit
def timestep_limit(self, value):
self.max_episode_steps = value
class EnvRegistry(object):
"""Register an env by ID. IDs remain stable over time and are
@@ -102,7 +116,14 @@ class EnvRegistry(object):
def make(self, id):
logger.info('Making new env: %s', id)
spec = self.spec(id)
return spec.make()
env = spec.make()
if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
from gym.wrappers.time_limit import TimeLimit
env = TimeLimit(env,
max_episode_steps=env.spec.max_episode_steps,
max_episode_seconds=env.spec.max_episode_seconds)
return env
def all(self):
return self.env_specs.values()