Files
Gymnasium/gym/envs/atari/atari_env.py

241 lines
7.5 KiB
Python
Raw Normal View History

2016-04-27 08:00:58 -07:00
import numpy as np
import os
import gym
from gym import error, spaces
from gym import utils
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
from gym.utils import seeding
2016-04-27 08:00:58 -07:00
try:
import atari_py
except ImportError as e:
raise error.DependencyNotInstalled(
2021-07-29 12:42:48 -04:00
"{}. (HINT: you can install Atari dependencies by running " "'pip install gym[atari]'.)".format(e)
2021-07-29 02:26:34 +02:00
)
2016-04-27 08:00:58 -07:00
def to_ram(ale):
ram_size = ale.getRAMSize()
ram = np.zeros((ram_size), dtype=np.uint8)
2016-04-27 08:00:58 -07:00
ale.getRAM(ram)
return ram
2016-04-27 08:00:58 -07:00
class AtariEnv(gym.Env, utils.EzPickle):
2021-07-29 02:26:34 +02:00
metadata = {"render.modes": ["human", "rgb_array"]}
2016-04-27 08:00:58 -07:00
def __init__(
2021-07-29 02:26:34 +02:00
self,
game="pong",
mode=None,
difficulty=None,
obs_type="ram",
frameskip=(2, 5),
repeat_action_probability=0.0,
full_action_space=False,
):
"""Frameskip should be either a tuple (indicating a random range to
choose from, with the top value exclude), or an int."""
utils.EzPickle.__init__(
2021-07-29 02:26:34 +02:00
self,
game,
mode,
difficulty,
obs_type,
frameskip,
repeat_action_probability,
full_action_space,
)
assert obs_type in ("ram", "image")
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
2019-06-21 19:50:54 -04:00
self.game = game
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
self.game_path = atari_py.get_game_path(game)
2019-06-21 19:50:54 -04:00
self.game_mode = mode
self.game_difficulty = difficulty
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
if not os.path.exists(self.game_path):
2021-07-29 02:26:34 +02:00
msg = "You asked for game %s but path %s does not exist"
raise IOError(msg % (game, self.game_path))
2016-04-27 08:00:58 -07:00
self._obs_type = obs_type
self.frameskip = frameskip
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
self.ale = atari_py.ALEInterface()
2016-04-27 08:00:58 -07:00
self.viewer = None
# Tune (or disable) ALE's action repeat:
# https://github.com/openai/gym/issues/349
2021-07-29 12:42:48 -04:00
assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(
repeat_action_probability
2021-07-29 02:26:34 +02:00
)
2021-07-29 12:42:48 -04:00
self.ale.setFloat("repeat_action_probability".encode("utf-8"), repeat_action_probability)
self.seed()
2016-04-27 08:00:58 -07:00
2021-07-29 12:42:48 -04:00
self._action_set = self.ale.getLegalActionSet() if full_action_space else self.ale.getMinimalActionSet()
self.action_space = spaces.Discrete(len(self._action_set))
(screen_width, screen_height) = self.ale.getScreenDims()
2021-07-29 02:26:34 +02:00
if self._obs_type == "ram":
2021-07-29 12:42:48 -04:00
self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
2021-07-29 02:26:34 +02:00
elif self._obs_type == "image":
2021-07-29 12:42:48 -04:00
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
else:
2021-07-29 12:42:48 -04:00
raise error.Error("Unrecognized observation type: {}".format(self._obs_type))
def seed(self, seed=None):
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets
# checked as an int elsewhere, so we need to keep it below
# 2**31.
2021-07-29 02:26:34 +02:00
seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
# Empirically, we need to seed before loading the ROM.
2021-07-29 02:26:34 +02:00
self.ale.setInt(b"random_seed", seed2)
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
self.ale.loadROM(self.game_path)
2019-06-21 19:50:54 -04:00
if self.game_mode is not None:
modes = self.ale.getAvailableModes()
2019-06-21 19:50:54 -04:00
2021-07-29 12:42:48 -04:00
assert self.game_mode in modes, ('Invalid game mode "{}" for game {}.\nAvailable modes are: {}').format(
self.game_mode, self.game, modes
)
self.ale.setMode(self.game_mode)
2019-06-21 19:50:54 -04:00
if self.game_difficulty is not None:
difficulties = self.ale.getAvailableDifficulties()
assert self.game_difficulty in difficulties, (
2021-07-29 02:26:34 +02:00
'Invalid game difficulty "{}" for game {}.\nAvailable difficulties are: {}'
).format(self.game_difficulty, self.game, difficulties)
self.ale.setDifficulty(self.game_difficulty)
2019-06-21 19:50:54 -04:00
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
return [seed1, seed2]
2016-04-27 08:00:58 -07:00
def step(self, a):
2016-04-27 08:00:58 -07:00
reward = 0.0
action = self._action_set[a]
if isinstance(self.frameskip, int):
num_steps = self.frameskip
else:
num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
for _ in range(num_steps):
2016-04-27 08:00:58 -07:00
reward += self.ale.act(action)
ob = self._get_obs()
2017-01-13 14:08:42 -08:00
return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
2016-04-27 08:00:58 -07:00
def _get_image(self):
2017-03-28 11:35:09 -07:00
return self.ale.getScreenRGB2()
2016-04-27 08:00:58 -07:00
def _get_ram(self):
return to_ram(self.ale)
@property
def _n_actions(self):
return len(self._action_set)
def _get_obs(self):
2021-07-29 02:26:34 +02:00
if self._obs_type == "ram":
2016-04-27 08:00:58 -07:00
return self._get_ram()
2021-07-29 02:26:34 +02:00
elif self._obs_type == "image":
2016-04-27 08:00:58 -07:00
img = self._get_image()
return img
# return: (states, observations)
def reset(self):
2016-04-27 08:00:58 -07:00
self.ale.reset_game()
return self._get_obs()
2021-07-29 02:26:34 +02:00
def render(self, mode="human"):
2016-04-27 08:00:58 -07:00
img = self._get_image()
2021-07-29 02:26:34 +02:00
if mode == "rgb_array":
2016-04-27 08:00:58 -07:00
return img
2021-07-29 02:26:34 +02:00
elif mode == "human":
2016-04-27 08:00:58 -07:00
from gym.envs.classic_control import rendering
2021-07-29 02:26:34 +02:00
2016-04-27 08:00:58 -07:00
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(img)
return self.viewer.isopen
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
2016-04-27 08:00:58 -07:00
def get_action_meanings(self):
return [ACTION_MEANING[i] for i in self._action_set]
def get_keys_to_action(self):
KEYWORD_TO_KEY = {
2021-07-29 02:26:34 +02:00
"UP": ord("w"),
"DOWN": ord("s"),
"LEFT": ord("a"),
"RIGHT": ord("d"),
"FIRE": ord(" "),
}
keys_to_action = {}
for action_id, action_meaning in enumerate(self.get_action_meanings()):
keys = []
for keyword, key in KEYWORD_TO_KEY.items():
if keyword in action_meaning:
keys.append(key)
keys = tuple(sorted(keys))
assert keys not in keys_to_action
keys_to_action[keys] = action_id
return keys_to_action
def clone_state(self):
"""Clone emulator state w/o system state. Restoring this state will
*not* give an identical environment. For complete cloning and restoring
of the full state, see `{clone,restore}_full_state()`."""
state_ref = self.ale.cloneState()
state = self.ale.encodeState(state_ref)
self.ale.deleteState(state_ref)
return state
def restore_state(self, state):
"""Restore emulator state w/o system state."""
state_ref = self.ale.decodeState(state)
2017-07-31 17:17:32 -07:00
self.ale.restoreState(state_ref)
self.ale.deleteState(state_ref)
def clone_full_state(self):
"""Clone emulator state w/ system state including pseudorandomness.
Restoring this state will give an identical environment."""
state_ref = self.ale.cloneSystemState()
state = self.ale.encodeState(state_ref)
self.ale.deleteState(state_ref)
return state
def restore_full_state(self, state):
"""Restore emulator state w/ system state including pseudorandomness."""
state_ref = self.ale.decodeState(state)
self.ale.restoreSystemState(state_ref)
self.ale.deleteState(state_ref)
2016-04-27 08:00:58 -07:00
2016-04-27 08:00:58 -07:00
ACTION_MEANING = {
0: "NOOP",
1: "FIRE",
2: "UP",
3: "RIGHT",
4: "LEFT",
5: "DOWN",
6: "UPRIGHT",
7: "UPLEFT",
8: "DOWNRIGHT",
9: "DOWNLEFT",
10: "UPFIRE",
11: "RIGHTFIRE",
12: "LEFTFIRE",
13: "DOWNFIRE",
14: "UPRIGHTFIRE",
15: "UPLEFTFIRE",
16: "DOWNRIGHTFIRE",
17: "DOWNLEFTFIRE",
}