Gymnasium/gym/core.py

import logging
logger = logging.getLogger(__name__)

import numpy as np

from gym import error, monitoring
from gym.utils import closer

env_closer = closer.Closer()

# Env-related abstractions

class Env(object):
    """The main OpenAI Gym class. It encapsulates an environment with
    arbitrary behind-the-scenes dynamics. An environment can be
    partially or fully observed.

    The main API methods that users of this class need to know are:

        reset
        step
        render
        close

    When implementing an environment, override the following methods
    in your subclass:

        _step
        _reset
        _render

    And set the following attributes:

        action_space: The Space object corresponding to valid actions
        observation_space: The Space object corresponding to valid observations
        reward_range: A tuple corresponding to the min and max possible rewards

    The methods are accessed publicly as "step", "reset", etc.. The
    non-underscored versions are wrapper methods to which we may add
    functionality to over time.
    """

    def __new__(cls, *args, **kwargs):
        # We use __new__ since we want the env author to be able to
        # override __init__ without remebering to call super.
        env = super(Env, cls).__new__(cls)
        env._env_closer_id = env_closer.register(env)
        env._closed = False

        # Will be automatically set when creating an environment via 'make'
        env.spec = None
        return env

    # Set this in SOME subclasses
    metadata = {'render.modes': []}
    reward_range = (-np.inf, np.inf)

    # Override in SOME subclasses
    def _close(self):
        pass

    # Set these in ALL subclasses
    action_space = None
    observation_space = None

    # Override in ALL subclasses
    def _step(self, action): raise NotImplementedError
    def _reset(self): raise NotImplementedError
    def _render(self, mode='human', close=False):
        if close:
            return
        raise NotImplementedError

    @property
    def monitor(self):
        if not hasattr(self, '_monitor'):
            self._monitor = monitoring.Monitor(self)
        return self._monitor

    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.

        Accepts an action and returns a tuple (observation, reward, done, info).

        Args:
            action (object): an action provided by the environment

        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        """
        if not self.action_space.contains(action):
            logger.warn("Action '{}' is not contained within action space '{}'.".format(action, self.action_space))

        self.monitor._before_step(action)
        observation, reward, done, info = self._step(action)
        if not self.observation_space.contains(observation):
            logger.warn("Observation '{}' is not contained within observation space '{}'.".format(observation, self.observation_space))

        done = self.monitor._after_step(observation, reward, done, info)
        return observation, reward, done, info

    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.

        Returns:
            observation (object): the initial observation of the space. (Initial reward is assumed to be 0.)
        """
        self.monitor._before_reset()
        observation = self._reset()
        self.monitor._after_reset(observation)
        return observation

    def render(self, mode='human', close=False):
        """Renders the environment.

        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.) By convention,
        if mode is:

        - human: render to the current display or terminal and
          return nothing. Usually for human consumption.
        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
          representing RGB values for an x-by-y pixel image, suitable
          for turning into a video.
        - ansi: Return a string (str) or StringIO.StringIO containing a
          terminal-style text representation. The text can include newlines
          and ANSI escape sequences (e.g. for colors).

        Note:
            Make sure that your class's metadata 'render.modes' key includes
              the list of supported modes. It's recommended to call super()
              in implementations to use the functionality of this method.

        Args:
            mode (str): the mode to render with
            close (bool): close all open renderings

        Example:

        class MyEnv(Env):
            metadata = {'render.modes': ['human', 'rgb_array']}

            def render(self, mode='human'):
                if mode == 'rgb_array':
                    return np.array(...) # return RGB frame suitable for video
                elif mode is 'human':
                    ... # pop up a window and render
                else:
                    super(MyEnv, self).render(mode=mode) # just raise an exception
        """
        if close:
            return self._render(close=close)

        # This code can be useful for calling super() in a subclass.
        modes = self.metadata.get('render.modes', [])
        if len(modes) == 0:
            raise error.UnsupportedMode('{} does not support rendering (requested mode: {})'.format(self, mode))
        elif mode not in modes:
            raise error.UnsupportedMode('Unsupported rendering mode: {}. (Supported modes for {}: {})'.format(mode, self, modes))

        return self._render(mode=mode, close=close)

    def close(self):
        """Override _close in your subclass to perform any necessary cleanup.

        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        if self._closed:
            return

        self._close()
        env_closer.unregister(self._env_closer_id)
        # If an error occurs before this line, it's possible to
        # end up with double close.
        self._closed = True

    def __del__(self):
        self.close()

    def __str__(self):
        return '<{} instance>'.format(type(self).__name__)

# Space-related abstractions

class Space(object):
    """
    Provides a classification state spaces and action spaces,
    so you can write generic code that applies to any Environment.
    E.g. to choose a random action.
    """

    def sample(self, seed=0):
        """
        Uniformly randomly sample a random elemnt of this space
        """
        raise NotImplementedError

    def contains(self, x):
        """
        Return boolean specifying if x is a valid
        member of this space
        """
        raise NotImplementedError

    def to_jsonable(self, sample_n):
        """Convert a batch of samples from this space to a JSONable data type."""
        # By default, assume identity is JSONable
        return sample_n

    def from_jsonable(self, sample_n):
        """Convert a JSONable data type to a batch of samples from this space."""
        # By default, assume identity is JSONable
        return sample_n
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`import logging`
Warn instead of raising on invalid action 2016-05-03 22:27:26 -04:00			`logger = logging.getLogger(__name__)`

Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`import numpy as np`

			`from gym import error, monitoring`
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`from gym.utils import closer`

			`env_closer = closer.Closer()`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
			`# Env-related abstractions`

			`class Env(object):`
			`"""The main OpenAI Gym class. It encapsulates an environment with`
Update Env docstrings 2016-04-28 10:33:37 -07:00			`arbitrary behind-the-scenes dynamics. An environment can be`
			`partially or fully observed.`

			`The main API methods that users of this class need to know are:`

			`reset`
			`step`
			`render`
Add close() to Env, add functionality to call close() on exit 2016-05-15 15:59:02 -07:00			`close`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
			`When implementing an environment, override the following methods`
			`in your subclass:`

			`_step`
			`_reset`
			`_render`

			`And set the following attributes:`

			`action_space: The Space object corresponding to valid actions`
			`observation_space: The Space object corresponding to valid observations`
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`reward_range: A tuple corresponding to the min and max possible rewards`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
			`The methods are accessed publicly as "step", "reset", etc.. The`
			`non-underscored versions are wrapper methods to which we may add`
			`functionality to over time.`
			`"""`

Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`def __new__(cls, args, *kwargs):`
			`# We use __new__ since we want the env author to be able to`
			`# override __init__ without remebering to call super.`
			`env = super(Env, cls).__new__(cls)`
			`env._env_closer_id = env_closer.register(env)`
			`env._closed = False`

			`# Will be automatically set when creating an environment via 'make'`
			`env.spec = None`
			`return env`

Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`# Set this in SOME subclasses`
			`metadata = {'render.modes': []}`
Add reward_range parameter to Env Closes #33 2016-04-30 22:53:10 -04:00			`reward_range = (-np.inf, np.inf)`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
Add close() to Env, add functionality to call close() on exit 2016-05-15 15:59:02 -07:00			`# Override in SOME subclasses`
			`def _close(self):`
			`pass`

Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`# Set these in ALL subclasses`
			`action_space = None`
			`observation_space = None`

			`# Override in ALL subclasses`
			`def _step(self, action): raise NotImplementedError`
			`def _reset(self): raise NotImplementedError`
			`def _render(self, mode='human', close=False):`
			`if close:`
			`return`
			`raise NotImplementedError`

			`@property`
			`def monitor(self):`
			`if not hasattr(self, '_monitor'):`
			`self._monitor = monitoring.Monitor(self)`
			`return self._monitor`

			`def step(self, action):`
Update Env docstrings 2016-04-28 10:33:37 -07:00			`"""Run one timestep of the environment's dynamics. When end of`
			episode is reached, you are responsible for calling `reset()`
			`to reset this environment's state.`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`Accepts an action and returns a tuple (observation, reward, done, info).`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`Args:`
			`action (object): an action provided by the environment`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`Returns:`
			`observation (object): agent's observation of the current environment`
			`reward (float) : amount of reward returned after previous action`
			`done (boolean): whether the episode has ended, in which case further step() calls will return undefined results`
			`info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`"""`
Set restriction on selected actions for MountainCar-v0 (#47) * Set restriction on selected actions * Used self.action_space instead of custom set * Move action validation to core.py 2016-05-04 11:40:24 +10:00			`if not self.action_space.contains(action):`
Fix action space warning (fixes #108) 2016-05-23 10:59:42 -07:00			`logger.warn("Action '{}' is not contained within action space '{}'.".format(action, self.action_space))`
Set restriction on selected actions for MountainCar-v0 (#47) * Set restriction on selected actions * Used self.action_space instead of custom set * Move action validation to core.py 2016-05-04 11:40:24 +10:00
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`self.monitor._before_step(action)`
			`observation, reward, done, info = self._step(action)`
Cartpole observations can occur outside of observation space limits - Issue #88 (#89) * Set restriction on selected actions * Used self.action_space instead of custom set * Move action validation to core.py * Fix for Cartpole observations outside of observation_space 2016-05-11 22:50:35 +10:00			`if not self.observation_space.contains(observation):`
			`logger.warn("Observation '{}' is not contained within observation space '{}'.".format(observation, self.observation_space))`
Fix observation_space for BipedalWalker (#84) * Fix observation_space for Bipedal_walker and add warning if observation doesn't fit observation_space * Remove observation state check on reset. Multiple environments call reset before action and observation spaces are initialized. 2016-05-11 05:36:09 +02:00
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`done = self.monitor._after_step(observation, reward, done, info)`
			`return observation, reward, done, info`

			`def reset(self):`
			`"""`
			`Resets the state of the environment and returns an initial observation.`

Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`Returns:`
			`observation (object): the initial observation of the space. (Initial reward is assumed to be 0.)`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`"""`
			`self.monitor._before_reset()`
			`observation = self._reset()`
			`self.monitor._after_reset(observation)`
			`return observation`

			`def render(self, mode='human', close=False):`
			`"""Renders the environment.`

			`The set of supported modes varies per environment. (And some`
			`environments do not support rendering at all.) By convention,`
			`if mode is:`

			`- human: render to the current display or terminal and`
			`return nothing. Usually for human consumption.`
			`- rgb_array: Return an numpy.ndarray with shape (x, y, 3),`
			`representing RGB values for an x-by-y pixel image, suitable`
			`for turning into a video.`
			`- ansi: Return a string (str) or StringIO.StringIO containing a`
			`terminal-style text representation. The text can include newlines`
			`and ANSI escape sequences (e.g. for colors).`

			`Note:`
			`Make sure that your class's metadata 'render.modes' key includes`
			`the list of supported modes. It's recommended to call super()`
			`in implementations to use the functionality of this method.`

			`Args:`
			`mode (str): the mode to render with`
			`close (bool): close all open renderings`

			`Example:`

			`class MyEnv(Env):`
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`metadata = {'render.modes': ['human', 'rgb_array']}`

			`def render(self, mode='human'):`
			`if mode == 'rgb_array':`
			`return np.array(...) # return RGB frame suitable for video`
			`elif mode is 'human':`
			`... # pop up a window and render`
			`else:`
			`super(MyEnv, self).render(mode=mode) # just raise an exception`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`"""`
			`if close:`
			`return self._render(close=close)`

			`# This code can be useful for calling super() in a subclass.`
			`modes = self.metadata.get('render.modes', [])`
			`if len(modes) == 0:`
			`raise error.UnsupportedMode('{} does not support rendering (requested mode: {})'.format(self, mode))`
			`elif mode not in modes:`
			`raise error.UnsupportedMode('Unsupported rendering mode: {}. (Supported modes for {}: {})'.format(mode, self, modes))`

			`return self._render(mode=mode, close=close)`

Add close() to Env, add functionality to call close() on exit 2016-05-15 15:59:02 -07:00			`def close(self):`
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`"""Override _close in your subclass to perform any necessary cleanup.`

			`Environments will automatically close() themselves when`
			`garbage collected or when the program exits.`
Add close() to Env, add functionality to call close() on exit 2016-05-15 15:59:02 -07:00			`"""`
Improve closer implementation and docstrings (#126) * Improve auto close implementation - Register all envs at initialization time, not just ones created via make - Simplify names and add more documentation on interface - Move closer instances into the relevant modules review-requested: @jietang * Close environments in the tests This isn't strictly needed, but means there are fewer Doom subprocesses hanging around while the tests run. * Use 4 space indent in comment * Improve docstrings in core * Don't pass through args to __new__ The __init__ method gets called once __new__ returns, so these arguments are either ignored (Python 2) or result in an error (Python 3). The __init__ method automatically gets called with the correct arguments. * Fixup comments 2016-05-27 12:16:35 -07:00			`if self._closed:`
			`return`

			`self._close()`
			`env_closer.unregister(self._env_closer_id)`
			`# If an error occurs before this line, it's possible to`
			`# end up with double close.`
			`self._closed = True`
Add close() to Env, add functionality to call close() on exit 2016-05-15 15:59:02 -07:00
			`def __del__(self):`
			`self.close()`

Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`def __str__(self):`
			`return '<{} instance>'.format(type(self).__name__)`

			`# Space-related abstractions`

			`class Space(object):`
			`"""`
			`Provides a classification state spaces and action spaces,`
			`so you can write generic code that applies to any Environment.`
			`E.g. to choose a random action.`
			`"""`

			`def sample(self, seed=0):`
			`"""`
			`Uniformly randomly sample a random elemnt of this space`
			`"""`
			`raise NotImplementedError`

			`def contains(self, x):`
			`"""`
			`Return boolean specifying if x is a valid`
			`member of this space`
			`"""`
			`raise NotImplementedError`

			`def to_jsonable(self, sample_n):`
			`"""Convert a batch of samples from this space to a JSONable data type."""`
			`# By default, assume identity is JSONable`
			`return sample_n`

			`def from_jsonable(self, sample_n):`
			`"""Convert a JSONable data type to a batch of samples from this space."""`
			`# By default, assume identity is JSONable`
			`return sample_n`