2016-04-27 08:00:58 -07:00
|
|
|
import numpy as np
|
|
|
|
import os
|
|
|
|
import gym
|
|
|
|
from gym import error, spaces
|
2018-02-27 10:21:14 -08:00
|
|
|
from gym import utils
|
2016-05-29 09:07:09 -07:00
|
|
|
from gym.utils import seeding
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
try:
|
|
|
|
import atari_py
|
2016-04-27 18:03:29 -07:00
|
|
|
except ImportError as e:
|
2016-08-06 02:15:12 -05:00
|
|
|
raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
def to_ram(ale):
|
|
|
|
ram_size = ale.getRAMSize()
|
|
|
|
ram = np.zeros((ram_size),dtype=np.uint8)
|
|
|
|
ale.getRAM(ram)
|
|
|
|
return ram
|
|
|
|
|
|
|
|
class AtariEnv(gym.Env, utils.EzPickle):
|
|
|
|
metadata = {'render.modes': ['human', 'rgb_array']}
|
|
|
|
|
2018-12-19 17:53:08 -08:00
|
|
|
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5),
|
|
|
|
repeat_action_probability=0., full_action_space=False):
|
2016-08-25 08:58:09 -07:00
|
|
|
"""Frameskip should be either a tuple (indicating a random range to
|
|
|
|
choose from, with the top value exclude), or an int."""
|
|
|
|
|
2018-08-24 16:22:20 -07:00
|
|
|
utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability)
|
2016-04-27 08:00:58 -07:00
|
|
|
assert obs_type in ('ram', 'image')
|
2016-05-29 09:07:09 -07:00
|
|
|
|
|
|
|
self.game_path = atari_py.get_game_path(game)
|
|
|
|
if not os.path.exists(self.game_path):
|
|
|
|
raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
|
2016-04-27 08:00:58 -07:00
|
|
|
self._obs_type = obs_type
|
2016-08-25 08:58:09 -07:00
|
|
|
self.frameskip = frameskip
|
2016-05-29 09:07:09 -07:00
|
|
|
self.ale = atari_py.ALEInterface()
|
2016-04-27 08:00:58 -07:00
|
|
|
self.viewer = None
|
|
|
|
|
2016-09-29 02:49:48 -07:00
|
|
|
# Tune (or disable) ALE's action repeat:
|
|
|
|
# https://github.com/openai/gym/issues/349
|
|
|
|
assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
|
|
|
|
self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
self.seed()
|
2016-04-27 08:00:58 -07:00
|
|
|
|
2018-12-19 17:53:08 -08:00
|
|
|
self._action_set = (self.ale.getLegalActionSet() if full_action_space
|
|
|
|
else self.ale.getMinimalActionSet())
|
2016-05-30 18:07:59 -07:00
|
|
|
self.action_space = spaces.Discrete(len(self._action_set))
|
|
|
|
|
|
|
|
(screen_width,screen_height) = self.ale.getScreenDims()
|
|
|
|
if self._obs_type == 'ram':
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
|
2016-05-30 18:07:59 -07:00
|
|
|
elif self._obs_type == 'image':
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
|
2016-05-30 18:07:59 -07:00
|
|
|
else:
|
|
|
|
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def seed(self, seed=None):
|
2016-05-29 09:07:09 -07:00
|
|
|
self.np_random, seed1 = seeding.np_random(seed)
|
|
|
|
# Derive a random seed. This gets passed as a uint, but gets
|
|
|
|
# checked as an int elsewhere, so we need to keep it below
|
|
|
|
# 2**31.
|
|
|
|
seed2 = seeding.hash_seed(seed1 + 1) % 2**31
|
|
|
|
# Empirically, we need to seed before loading the ROM.
|
|
|
|
self.ale.setInt(b'random_seed', seed2)
|
|
|
|
self.ale.loadROM(self.game_path)
|
|
|
|
return [seed1, seed2]
|
2016-04-27 08:00:58 -07:00
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def step(self, a):
|
2016-04-27 08:00:58 -07:00
|
|
|
reward = 0.0
|
|
|
|
action = self._action_set[a]
|
2016-08-25 08:58:09 -07:00
|
|
|
|
|
|
|
if isinstance(self.frameskip, int):
|
|
|
|
num_steps = self.frameskip
|
|
|
|
else:
|
|
|
|
num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
|
2016-04-27 18:03:29 -07:00
|
|
|
for _ in range(num_steps):
|
2016-04-27 08:00:58 -07:00
|
|
|
reward += self.ale.act(action)
|
|
|
|
ob = self._get_obs()
|
|
|
|
|
2017-01-13 14:08:42 -08:00
|
|
|
return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
def _get_image(self):
|
2017-03-28 11:35:09 -07:00
|
|
|
return self.ale.getScreenRGB2()
|
2016-08-24 00:15:27 -07:00
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
def _get_ram(self):
|
|
|
|
return to_ram(self.ale)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def _n_actions(self):
|
|
|
|
return len(self._action_set)
|
|
|
|
|
|
|
|
def _get_obs(self):
|
|
|
|
if self._obs_type == 'ram':
|
|
|
|
return self._get_ram()
|
|
|
|
elif self._obs_type == 'image':
|
|
|
|
img = self._get_image()
|
|
|
|
return img
|
|
|
|
|
|
|
|
# return: (states, observations)
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def reset(self):
|
2016-04-27 08:00:58 -07:00
|
|
|
self.ale.reset_game()
|
|
|
|
return self._get_obs()
|
|
|
|
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
def render(self, mode='human'):
|
2016-04-27 08:00:58 -07:00
|
|
|
img = self._get_image()
|
|
|
|
if mode == 'rgb_array':
|
|
|
|
return img
|
2016-05-09 20:51:04 -04:00
|
|
|
elif mode == 'human':
|
2016-04-27 08:00:58 -07:00
|
|
|
from gym.envs.classic_control import rendering
|
|
|
|
if self.viewer is None:
|
|
|
|
self.viewer = rendering.SimpleImageViewer()
|
|
|
|
self.viewer.imshow(img)
|
Cleanup, removal of unmaintained code (#836)
* add dtype to Box
* remove board_game, debugging, safety, parameter_tuning environments
* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder
* Improve render("human"), now resizable, closable window.
* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods
* CubeCrash unit test environment
* followup fixes
* MemorizeDigits unit test envrionment
* refactored spaces a bit
fixed indentation
disabled test_env_semantics
* fix unit tests
* fixes
* CubeCrash, MemorizeDigits tested
* gym backwards compatibility patch
* gym backwards compatibility, followup fixes
* changelist, add spaces to main namespaces
* undo_logger_setup for backwards compat
* remove configuration.py
2018-01-25 18:20:14 -08:00
|
|
|
return self.viewer.isopen
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
if self.viewer is not None:
|
|
|
|
self.viewer.close()
|
|
|
|
self.viewer = None
|
2016-04-27 18:03:29 -07:00
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
def get_action_meanings(self):
|
|
|
|
return [ACTION_MEANING[i] for i in self._action_set]
|
|
|
|
|
2017-02-01 13:10:59 -08:00
|
|
|
def get_keys_to_action(self):
|
|
|
|
KEYWORD_TO_KEY = {
|
|
|
|
'UP': ord('w'),
|
|
|
|
'DOWN': ord('s'),
|
|
|
|
'LEFT': ord('a'),
|
|
|
|
'RIGHT': ord('d'),
|
|
|
|
'FIRE': ord(' '),
|
|
|
|
}
|
|
|
|
|
|
|
|
keys_to_action = {}
|
|
|
|
|
|
|
|
for action_id, action_meaning in enumerate(self.get_action_meanings()):
|
|
|
|
keys = []
|
|
|
|
for keyword, key in KEYWORD_TO_KEY.items():
|
|
|
|
if keyword in action_meaning:
|
|
|
|
keys.append(key)
|
|
|
|
keys = tuple(sorted(keys))
|
|
|
|
|
|
|
|
assert keys not in keys_to_action
|
|
|
|
keys_to_action[keys] = action_id
|
|
|
|
|
|
|
|
return keys_to_action
|
|
|
|
|
2017-05-02 14:08:24 -07:00
|
|
|
def clone_state(self):
|
|
|
|
"""Clone emulator state w/o system state. Restoring this state will
|
|
|
|
*not* give an identical environment. For complete cloning and restoring
|
|
|
|
of the full state, see `{clone,restore}_full_state()`."""
|
|
|
|
state_ref = self.ale.cloneState()
|
|
|
|
state = self.ale.encodeState(state_ref)
|
|
|
|
self.ale.deleteState(state_ref)
|
|
|
|
return state
|
|
|
|
|
|
|
|
def restore_state(self, state):
|
|
|
|
"""Restore emulator state w/o system state."""
|
|
|
|
state_ref = self.ale.decodeState(state)
|
2017-07-31 17:17:32 -07:00
|
|
|
self.ale.restoreState(state_ref)
|
2017-05-02 14:08:24 -07:00
|
|
|
self.ale.deleteState(state_ref)
|
|
|
|
|
|
|
|
def clone_full_state(self):
|
|
|
|
"""Clone emulator state w/ system state including pseudorandomness.
|
|
|
|
Restoring this state will give an identical environment."""
|
|
|
|
state_ref = self.ale.cloneSystemState()
|
|
|
|
state = self.ale.encodeState(state_ref)
|
|
|
|
self.ale.deleteState(state_ref)
|
|
|
|
return state
|
|
|
|
|
|
|
|
def restore_full_state(self, state):
|
|
|
|
"""Restore emulator state w/ system state including pseudorandomness."""
|
|
|
|
state_ref = self.ale.decodeState(state)
|
|
|
|
self.ale.restoreSystemState(state_ref)
|
|
|
|
self.ale.deleteState(state_ref)
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
ACTION_MEANING = {
|
|
|
|
0 : "NOOP",
|
|
|
|
1 : "FIRE",
|
|
|
|
2 : "UP",
|
|
|
|
3 : "RIGHT",
|
|
|
|
4 : "LEFT",
|
|
|
|
5 : "DOWN",
|
|
|
|
6 : "UPRIGHT",
|
|
|
|
7 : "UPLEFT",
|
|
|
|
8 : "DOWNRIGHT",
|
|
|
|
9 : "DOWNLEFT",
|
|
|
|
10 : "UPFIRE",
|
|
|
|
11 : "RIGHTFIRE",
|
|
|
|
12 : "LEFTFIRE",
|
|
|
|
13 : "DOWNFIRE",
|
|
|
|
14 : "UPRIGHTFIRE",
|
|
|
|
15 : "UPLEFTFIRE",
|
|
|
|
16 : "DOWNRIGHTFIRE",
|
|
|
|
17 : "DOWNLEFTFIRE",
|
2016-04-27 18:03:29 -07:00
|
|
|
}
|