Gymnasium/gym/envs/atari/atari_env.py

import numpy as np
import os
import gym
from gym import error, spaces
from gym import utils
from gym.utils import seeding

try:
    import atari_py
except ImportError as e:
    raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))

import logging
logger = logging.getLogger(__name__)

def to_ram(ale):
    ram_size = ale.getRAMSize()
    ram = np.zeros((ram_size),dtype=np.uint8)
    ale.getRAM(ram)
    return ram

class AtariEnv(gym.Env, utils.EzPickle):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = atari_py.get_game_path(game)
        if not os.path.exists(self.game_path):
            raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)

        self._seed()

        (screen_width, screen_height) = self.ale.getScreenDims()

        self._action_set = self.ale.getMinimalActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))

    def _seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        self.ale.loadROM(self.game_path)
        return [seed1, seed2]

    def _step(self, a):
        reward = 0.0
        action = self._action_set[a]

        if isinstance(self.frameskip, int):
            num_steps = self.frameskip
        else:
            num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
        for _ in range(num_steps):
            reward += self.ale.act(action)
        ob = self._get_obs()

        return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}

    def _get_image(self):
        return self.ale.getScreenRGB2()

    def _get_ram(self):
        return to_ram(self.ale)

    @property
    def _n_actions(self):
        return len(self._action_set)

    def _get_obs(self):
        if self._obs_type == 'ram':
            return self._get_ram()
        elif self._obs_type == 'image':
            img = self._get_image()
        return img

    # return: (states, observations)
    def _reset(self):
        self.ale.reset_game()
        return self._get_obs()

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def get_action_meanings(self):
        return [ACTION_MEANING[i] for i in self._action_set]

    def get_keys_to_action(self):
        KEYWORD_TO_KEY = {
            'UP':      ord('w'),
            'DOWN':    ord('s'),
            'LEFT':    ord('a'),
            'RIGHT':   ord('d'),
            'FIRE':    ord(' '),
        }

        keys_to_action = {}

        for action_id, action_meaning in enumerate(self.get_action_meanings()):
            keys = []
            for keyword, key in KEYWORD_TO_KEY.items():
                if keyword in action_meaning:
                    keys.append(key)
            keys = tuple(sorted(keys))

            assert keys not in keys_to_action
            keys_to_action[keys] = action_id

        return keys_to_action

    # def save_state(self):
    #     return self.ale.saveState()

    # def load_state(self):
    #     return self.ale.loadState()

    # def clone_state(self):
    #     return self.ale.cloneState()

    # def restore_state(self, state):
    #     return self.ale.restoreState(state)


ACTION_MEANING = {
    0 : "NOOP",
    1 : "FIRE",
    2 : "UP",
    3 : "RIGHT",
    4 : "LEFT",
    5 : "DOWN",
    6 : "UPRIGHT",
    7 : "UPLEFT",
    8 : "DOWNRIGHT",
    9 : "DOWNLEFT",
    10 : "UPFIRE",
    11 : "RIGHTFIRE",
    12 : "LEFTFIRE",
    13 : "DOWNFIRE",
    14 : "UPRIGHTFIRE",
    15 : "UPLEFTFIRE",
    16 : "DOWNRIGHTFIRE",
    17 : "DOWNLEFTFIRE",
}
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`import numpy as np`
			`import os`
			`import gym`
			`from gym import error, spaces`
			`from gym import utils`
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec 2016-05-29 09:07:09 -07:00			`from gym.utils import seeding`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
			`try:`
			`import atari_py`
Fix Python3 compat of import dependencies 2016-04-27 18:03:29 -07:00			`except ImportError as e:`
Fixed quotes in NotInstalled error message (#261) s/.)'/'.)/ -- the quote accidentally went around too much. 2016-08-06 02:15:12 -05:00			`raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
			`import logging`
			`logger = logging.getLogger(__name__)`

			`def to_ram(ale):`
			`ram_size = ale.getRAMSize()`
			`ram = np.zeros((ram_size),dtype=np.uint8)`
			`ale.getRAM(ram)`
			`return ram`

			`class AtariEnv(gym.Env, utils.EzPickle):`
			`metadata = {'render.modes': ['human', 'rgb_array']}`

Disable ALE's underlying repeat_action_probability (#354) * Disable ALE's underlying repeat_action_probability * Add assertion for repeat_action_probability * Properly encode for py3 2016-09-21 00:36:56 -07:00			`def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):`
Add Atari environments with deterministic frameskip 2016-08-25 08:58:09 -07:00			`"""Frameskip should be either a tuple (indicating a random range to`
			`choose from, with the top value exclude), or an int."""`

Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`utils.EzPickle.__init__(self, game, obs_type)`
			`assert obs_type in ('ram', 'image')`
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec 2016-05-29 09:07:09 -07:00
			`self.game_path = atari_py.get_game_path(game)`
			`if not os.path.exists(self.game_path):`
			`raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`self._obs_type = obs_type`
Add Atari environments with deterministic frameskip 2016-08-25 08:58:09 -07:00			`self.frameskip = frameskip`
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec 2016-05-29 09:07:09 -07:00			`self.ale = atari_py.ALEInterface()`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`self.viewer = None`

Move repeat_action_probability before loading the ROM (#362) Closes #349 2016-09-29 02:49:48 -07:00			`# Tune (or disable) ALE's action repeat:`
			`# https://github.com/openai/gym/issues/349`
			`assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)`
			`self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)`

[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec 2016-05-29 09:07:09 -07:00			`self._seed()`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
Remove unnecessary copies from atari environments (#317) 2016-08-24 00:15:27 -07:00			`(screen_width, screen_height) = self.ale.getScreenDims()`

Switch to a global PRNG for action/observation spaces (#144) cf https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277 2016-05-30 18:07:59 -07:00			`self._action_set = self.ale.getMinimalActionSet()`
			`self.action_space = spaces.Discrete(len(self._action_set))`

			`(screen_width,screen_height) = self.ale.getScreenDims()`
			`if self._obs_type == 'ram':`
			`self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255)`
			`elif self._obs_type == 'image':`
			`self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3))`
			`else:`
			`raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))`

[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec 2016-05-29 09:07:09 -07:00			`def _seed(self, seed=None):`
			`self.np_random, seed1 = seeding.np_random(seed)`
			`# Derive a random seed. This gets passed as a uint, but gets`
			`# checked as an int elsewhere, so we need to keep it below`
			`# 2**31.`
			`seed2 = seeding.hash_seed(seed1 + 1) % 2**31`
			`# Empirically, we need to seed before loading the ROM.`
			`self.ale.setInt(b'random_seed', seed2)`
			`self.ale.loadROM(self.game_path)`
			`return [seed1, seed2]`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
			`def _step(self, a):`
			`reward = 0.0`
			`action = self._action_set[a]`
Add Atari environments with deterministic frameskip 2016-08-25 08:58:09 -07:00
			`if isinstance(self.frameskip, int):`
			`num_steps = self.frameskip`
			`else:`
			`num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])`
Fix Python3 compat of import dependencies 2016-04-27 18:03:29 -07:00			`for _ in range(num_steps):`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`reward += self.ale.act(action)`
			`ob = self._get_obs()`

Expose ale lives in info dict 2017-01-13 14:08:42 -08:00			`return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00
			`def _get_image(self):`
Use getScreenRGB2 2017-03-28 11:35:09 -07:00			`return self.ale.getScreenRGB2()`
Remove unnecessary copies from atari environments (#317) 2016-08-24 00:15:27 -07:00
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`def _get_ram(self):`
			`return to_ram(self.ale)`

			`@property`
			`def _n_actions(self):`
			`return len(self._action_set)`

			`def _get_obs(self):`
			`if self._obs_type == 'ram':`
			`return self._get_ram()`
			`elif self._obs_type == 'image':`
			`img = self._get_image()`
			`return img`

			`# return: (states, observations)`
			`def _reset(self):`
			`self.ale.reset_game()`
			`return self._get_obs()`

Fix Python3 compat of import dependencies 2016-04-27 18:03:29 -07:00			`def _render(self, mode='human', close=False):`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`if close:`
			`if self.viewer is not None:`
			`self.viewer.close()`
Discard viewer object after render with close=True Fixes #95 2016-05-15 17:22:38 -07:00			`self.viewer = None`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`return`
			`img = self._get_image()`
			`if mode == 'rgb_array':`
			`return img`
Replaced `is` string comparison with `==` (#77) 2016-05-09 20:51:04 -04:00			`elif mode == 'human':`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`from gym.envs.classic_control import rendering`
			`if self.viewer is None:`
			`self.viewer = rendering.SimpleImageViewer()`
			`self.viewer.imshow(img)`
Fix Python3 compat of import dependencies 2016-04-27 18:03:29 -07:00
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00			`def get_action_meanings(self):`
			`return [ACTION_MEANING[i] for i in self._action_set]`

TimeLimit refactor with Monitor Simplification (#482) * fix double reset, as suggested by @jietang * better floors and ceilings * add convenience methods to monitor * add wrappers to gym namespace * allow playing Atari games, with potentially more coming in the future * simplify example in docs * Move play out of the Env * fix tests * no more deprecation warnings * remove env.monitor * monitor simplification * monitor simplifications * monitor related fixes * a few changes suggested by linter * timestep_limit fixes * keep track of gym env variables for future compatibility * timestep_limit => max_episode_timesteps * don't apply TimeLimit wrapper in make for VNC envs * Respect old timestep_limit argument * Pass max_episode_seconds through registration * Don't include deprecation warnings yet 2017-02-01 13:10:59 -08:00			`def get_keys_to_action(self):`
			`KEYWORD_TO_KEY = {`
			`'UP': ord('w'),`
			`'DOWN': ord('s'),`
			`'LEFT': ord('a'),`
			`'RIGHT': ord('d'),`
			`'FIRE': ord(' '),`
			`}`

			`keys_to_action = {}`

			`for action_id, action_meaning in enumerate(self.get_action_meanings()):`
			`keys = []`
			`for keyword, key in KEYWORD_TO_KEY.items():`
			`if keyword in action_meaning:`
			`keys.append(key)`
			`keys = tuple(sorted(keys))`

			`assert keys not in keys_to_action`
			`keys_to_action[keys] = action_id`

			`return keys_to_action`

exposing save, load, clone and restore states at enviroment level (#229) * exposing save, load, clone and restore states at enviroment level for more visibility * add save, load, restore, clone states as a comment 2016-06-28 17:59:53 -07:00			`# def save_state(self):`
			`# return self.ale.saveState()`

			`# def load_state(self):`
			`# return self.ale.loadState()`

			`# def clone_state(self):`
			`# return self.ale.cloneState()`

			`# def restore_state(self, state):`
			`# return self.ale.restoreState(state)`
Initial release. Hello world :). 2016-04-27 08:00:58 -07:00

			`ACTION_MEANING = {`
			`0 : "NOOP",`
			`1 : "FIRE",`
			`2 : "UP",`
			`3 : "RIGHT",`
			`4 : "LEFT",`
			`5 : "DOWN",`
			`6 : "UPRIGHT",`
			`7 : "UPLEFT",`
			`8 : "DOWNRIGHT",`
			`9 : "DOWNLEFT",`
			`10 : "UPFIRE",`
			`11 : "RIGHTFIRE",`
			`12 : "LEFTFIRE",`
			`13 : "DOWNFIRE",`
			`14 : "UPRIGHTFIRE",`
			`15 : "UPLEFTFIRE",`
			`16 : "DOWNRIGHTFIRE",`
			`17 : "DOWNLEFTFIRE",`
Fix Python3 compat of import dependencies 2016-04-27 18:03:29 -07:00			`}`