diff --git a/gym/__init__.py b/gym/__init__.py index 8fc7c62ab..25989007c 100644 --- a/gym/__init__.py +++ b/gym/__init__.py @@ -31,7 +31,7 @@ del logger_setup sanity_check_dependencies() -from gym.core import Env, Space, Wrapper +from gym.core import Env, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper from gym.envs import make, spec from gym.scoreboard.api import upload diff --git a/gym/core.py b/gym/core.py index a5b59f798..174b2cae9 100644 --- a/gym/core.py +++ b/gym/core.py @@ -85,6 +85,9 @@ class Env(object): raise NotImplementedError def _seed(self, seed=None): return [] + # Do not override + _owns_render = True + @property def monitor(self): """Lazily creates a monitor instance. @@ -122,14 +125,16 @@ class Env(object): return observation, reward, done, info def reset(self): - """ - Resets the state of the environment and returns an initial observation. + """Resets the state of the environment and returns an initial + observation. Will call 'configure()' if not already called. - Returns: - observation (object): the initial observation of the space. (Initial reward is assumed to be 0.) + Returns: observation (object): the initial observation of the + space. (Initial reward is assumed to be 0.) """ if self.metadata.get('configure.required') and not self._configured: - raise error.Error("{} requires calling 'configure()' before 'reset()'".format(self)) + raise error.Error("{} requires manually calling 'configure()' before 'reset()'".format(self)) + elif not self._configured: + self.configure() self.monitor._before_reset() observation = self._reset() @@ -197,9 +202,11 @@ class Env(object): if not hasattr(self, '_closed') or self._closed: return - # Automatically close the monitor and any render window - self.monitor.close() - self.render(close=True) + # Automatically close the monitor and any render window. + if hasattr(self, '_monitor'): + self.monitor.close() + if self._owns_render: + self.render(close=True) self._close() env_closer.unregister(self._env_closer_id) @@ -236,7 +243,7 @@ class Env(object): self._configured = True try: - return self._configure(*args, **kwargs) + self._configure(*args, **kwargs) except TypeError as e: # It can be confusing if you have the wrong environment # and try calling with unsupported arguments, since your @@ -301,14 +308,24 @@ class Space(object): return sample_n class Wrapper(Env): - def __init__(self, env): + _owns_render = False + + # Make sure self.env is always defined, even if things break + # early. + env = None + + def __init__(self, env=None): self.env = env - self.metadata = env.metadata - self.action_space = env.action_space - self.observation_space = env.observation_space - self.reward_range = env.reward_range - self._spec = env.spec - self._unwrapped = env.unwrapped + # Merge with the base metadata + metadata = self.metadata + self.metadata = self.env.metadata.copy() + self.metadata.update(metadata) + + self.action_space = self.env.action_space + self.observation_space = self.env.observation_space + self.reward_range = self.env.reward_range + self._spec = self.env.spec + self._unwrapped = self.env.unwrapped def _step(self, action): return self.env.step(action) @@ -317,9 +334,13 @@ class Wrapper(Env): return self.env.reset() def _render(self, mode='human', close=False): + if self.env is None: + return return self.env.render(mode, close) def _close(self): + if self.env is None: + return return self.env.close() def _configure(self, *args, **kwargs): @@ -329,7 +350,10 @@ class Wrapper(Env): return self.env.seed(seed) def __str__(self): - return '<{}{} instance>'.format(type(self).__name__, self.env) + return '<{}{}>'.format(type(self).__name__, self.env) + + def __repr__(self): + return str(self) @property def spec(self): @@ -340,6 +364,43 @@ class Wrapper(Env): @spec.setter def spec(self, spec): # Won't have an env attr while in the __new__ from gym.Env - if hasattr(self, 'env'): + if self.env is not None: self.env.spec = spec self._spec = spec + +class ObservationWrapper(Wrapper): + def _reset(self): + observation = self.env.reset() + return self._observation(observation) + + def _step(self, action): + observation, reward, done, info = self.env.step(action) + return self.observation(observation), reward, done, info + + def observation(self, observation): + return self._observation(observation) + + def _observation(self, observation): + raise NotImplementedError + +class RewardWrapper(Wrapper): + def _step(self, action): + observation, reward, done, info = self.env.step(action) + return observation, self.reward(reward), done, info + + def reward(self, reward): + return self._reward(reward) + + def _reward(self, reward): + raise NotImplementedError + +class ActionWrapper(Wrapper): + def _step(self, action): + action = self.action(action) + return self.env.step(action) + + def action(self, action): + return self._action(action) + + def _action(self, action): + raise NotImplementedError diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index dfa49a1bd..94ff413f7 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -280,6 +280,15 @@ for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', ' nondeterministic=nondeterministic, ) + register( + id='{}-v1'.format(name), + entry_point='gym.envs.atari:AtariEnv', + kwargs={'game': game, 'obs_type': obs_type}, + timestep_limit=100000, + nondeterministic=nondeterministic, + ) + + # Standard Deterministic (as in the original DeepMind paper) if game == 'space_invaders': frameskip = 3 else: @@ -294,6 +303,33 @@ for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', ' nondeterministic=nondeterministic, ) + register( + # Use a deterministic frame skip. + id='{}Deterministic-v1'.format(name), + entry_point='gym.envs.atari:AtariEnv', + kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip}, + timestep_limit=100000, + nondeterministic=nondeterministic, + ) + + # No frameskip. (Atari has no entropy source, so these are + # deterministic environments.) + register( + id='{}NoFrameskip-v0'.format(name), + entry_point='gym.envs.atari:AtariEnv', + kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame + timestep_limit=40000, + nondeterministic=nondeterministic, + ) + + register( + id='{}NoFrameskip-v1'.format(name), + entry_point='gym.envs.atari:AtariEnv', + kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame + timestep_limit=400000, + nondeterministic=nondeterministic, + ) + # Board games # ---------------------------------------- diff --git a/gym/envs/registration.py b/gym/envs/registration.py index 946af0ec9..d4d470c6d 100644 --- a/gym/envs/registration.py +++ b/gym/envs/registration.py @@ -27,6 +27,7 @@ class EnvSpec(object): local_only: True iff the environment is to be used only on the local machine (e.g. debugging envs) kwargs (dict): The kwargs to pass to the environment class nondeterministic (bool): Whether this environment is non-deterministic even after seeding + tags (list[str]): A set of arbitrary tags on this environment Attributes: id (str): The official environment ID @@ -34,7 +35,7 @@ class EnvSpec(object): trials (int): The number of trials run in official evaluation """ - def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False): + def __init__(self, id, entry_point=None, timestep_limit=1000, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=[]): self.id = id # Evaluation parameters self.timestep_limit = timestep_limit @@ -42,6 +43,7 @@ class EnvSpec(object): self.reward_threshold = reward_threshold # Environment properties self.nondeterministic = nondeterministic + self.tags = tags # We may make some of these other parameters public if they're # useful. diff --git a/gym/monitoring/monitor.py b/gym/monitoring/monitor.py index 07592dc5f..7d9cc6a3b 100644 --- a/gym/monitoring/monitor.py +++ b/gym/monitoring/monitor.py @@ -204,7 +204,8 @@ class Monitor(object): logger.error('Could not close renderer for %s: %s', key, e) # Remove the env's pointer to this monitor - del env._monitor + if hasattr(env, '_monitor'): + del env._monitor # Stop tracking this for autoclose monitor_closer.unregister(self._monitor_id) diff --git a/gym/version.py b/gym/version.py index 09e82fd65..0ad6b235f 100644 --- a/gym/version.py +++ b/gym/version.py @@ -1 +1 @@ -VERSION = '0.2.6' +VERSION = '0.2.11'