Doom - Same Action Space Across Environments (#157)

* Doom - Added reward_threshold and timestep_limit for all environments * Doom - Returning all available game variables * Doom - Moved _seed to doom_env to avoid repetition in every environment * Doom - Added ALT_ATTACK and made all action_space equivalent (same controls between environments). * Doom - Actions can either be a short list of allowed actions or the full list of 41 commands * Doom - Returning black observation space on error or is_finished, rather than empty list (which was triggering an error) * Doom - HighLow.sample() returns the small list. * Doom - Updated difficulty for some missions * Doom - Fixed inconsistency between controls.md and deathmatch.cfg * Doom - Issue #168 - Remove sleep statement from DoomEnv render * Doom - Only using full action space (43 keys) - Added 'normal', 'fast' and 'human' mode - Set non-deterministic to True - Set video.frames_per_second to 35 - Properly returning game variables * Replaced warnings.warn by logger.warn * Doom - Added NUM_ACTIONS and action_idx instead of x * Doom - Added NUM_ACTIONS and action_idx instead of x * Doom - reset() only calls game.new_episode() after first call * Doom is now deterministic * Doom - Partial fix for issue #167 - DoomDeathmatch environment crashes sporadically * Doom - Standardized envs, simplified _reset * Doom - Removed temporary fix for issue #167 * Doom - Added scoreboard summary and description
2025-08-23 15:04:20 +00:00 · 2016-06-14 18:57:47 -04:00
parent 5b8603066c
commit aff7a643cc
26 changed files with 803 additions and 416 deletions
--- a/gym/envs/init.py
+++ b/gym/envs/init.py
@@ -299,46 +299,64 @@ register(
 register(
    id='DoomBasic-v0',
    entry_point='gym.envs.doom:DoomBasicEnv',
    timestep_limit=10000,
    reward_threshold=10.0,
 )
 register(
    id='DoomCorridor-v0',
    entry_point='gym.envs.doom:DoomCorridorEnv',
    timestep_limit=10000,
    reward_threshold=1000.0,
 )
 register(
    id='DoomDefendCenter-v0',
    entry_point='gym.envs.doom:DoomDefendCenterEnv',
    timestep_limit=10000,
    reward_threshold=10.0,
 )
 register(
    id='DoomDefendLine-v0',
    entry_point='gym.envs.doom:DoomDefendLineEnv',
    timestep_limit=10000,
    reward_threshold=15.0,
 )
 register(
    id='DoomHealthGathering-v0',
    entry_point='gym.envs.doom:DoomHealthGatheringEnv',
    timestep_limit=10000,
    reward_threshold=1000.0,
 )
 register(
    id='DoomMyWayHome-v0',
    entry_point='gym.envs.doom:DoomMyWayHomeEnv',
    timestep_limit=10000,
    reward_threshold=0.5,
 )
 register(
    id='DoomPredictPosition-v0',
    entry_point='gym.envs.doom:DoomPredictPositionEnv',
    timestep_limit=10000,
    reward_threshold=0.5,
 )
 register(
    id='DoomTakeCover-v0',
    entry_point='gym.envs.doom:DoomTakeCoverEnv',
    timestep_limit=10000,
    reward_threshold=750.0,
 )
 register(
    id='DoomDeathmatch-v0',
    entry_point='gym.envs.doom:DoomDeathmatchEnv',
    timestep_limit=10000,
    reward_threshold=20.0,
 )
 # Debugging
--- a/gym/envs/doom/assets/basic.cfg
+++ b/gym/envs/doom/assets/basic.cfg
@@ -32,13 +32,29 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 5
 sound_enabled = false
--- a/gym/envs/doom/assets/deadly_corridor.cfg
+++ b/gym/envs/doom/assets/deadly_corridor.cfg
@@ -35,13 +35,29 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 5
 sound_enabled = false
--- a/gym/envs/doom/assets/deathmatch.cfg
+++ b/gym/envs/doom/assets/deathmatch.cfg
@@ -76,8 +76,15 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
@@ -88,8 +95,10 @@ available_game_variables =
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 5
 sound_enabled = false
--- a/gym/envs/doom/assets/defend_the_center.cfg
+++ b/gym/envs/doom/assets/defend_the_center.cfg
@@ -32,13 +32,29 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 3
 sound_enabled = false
--- a/gym/envs/doom/assets/defend_the_line.cfg
+++ b/gym/envs/doom/assets/defend_the_line.cfg
@@ -32,13 +32,29 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 5
 sound_enabled = false
--- a/gym/envs/doom/assets/health_gathering.cfg
+++ b/gym/envs/doom/assets/health_gathering.cfg
@@ -33,14 +33,30 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 5
 sound_enabled = false
--- a/gym/envs/doom/assets/my_way_home.cfg
+++ b/gym/envs/doom/assets/my_way_home.cfg
@@ -17,8 +17,8 @@ render_particles = false
 # make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
 episode_start_time = 14
-# Make episodes finish after 4200 tics (2 minutes)
+# Make episodes finish after 2100 tics (1 minutes)
-episode_timeout = 4200
+episode_timeout = 2100
 # Available buttons
 available_buttons =
@@ -32,14 +32,29 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
-        AMMO0
+
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 5
 sound_enabled = false
--- a/gym/envs/doom/assets/predict_position.cfg
+++ b/gym/envs/doom/assets/predict_position.cfg
@@ -32,14 +32,30 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 3
 sound_enabled = false
--- a/gym/envs/doom/assets/take_cover.cfg
+++ b/gym/envs/doom/assets/take_cover.cfg
@@ -31,13 +31,29 @@ available_buttons =
 available_game_variables =
    {
        KILLCOUNT
        ITEMCOUNT
        SECRETCOUNT
        FRAGCOUNT
        HEALTH
        ARMOR
        DEAD
        ON_GROUND
        ATTACK_READY
        ALTATTACK_READY
        SELECTED_WEAPON
        SELECTED_WEAPON_AMMO
        AMMO1
        AMMO2
        AMMO3
        AMMO4
        AMMO5
        AMMO6
        AMMO7
        AMMO8
        AMMO9
        AMMO0
    }
 mode = PLAYER
 doom_skill = 5
 sound_enabled = false
--- a/gym/envs/doom/controls.md
+++ b/gym/envs/doom/controls.md
@@ -2,21 +2,27 @@
 Doom is usually played with a full keyboard, and multiple keys can be pressed at once.
-To replicate this, we broke down the possible actions in 40 keys. Each key can be pressed (value of 1), or unpressed (value of 0).
+To replicate this, we broke down the possible actions in 43 keys. Each key can be pressed (value of 1), or unpressed (value of 0).
-The deltas (35 to 39) indicate speed of change (values 0 to 10), where higher values will make the player move faster on an axis.
+The last 5 commands are deltas. [38] - LOOK_UP_DOWN_DELTA and [39] - TURN_LEFT_RIGHT_DELTA replicate mouse movement where values are in the
 range -10 to +10. They represent mouse movement over the x and y axis. (e.g. +5 for LOOK_UP_DOWN_DELTA will make the player look up 5 degrees)
 [40] - MOVE_FORWARD_BACKWARD_DELTA, [41] - MOVE_LEFT_RIGHT_DELTA, and [42] - MOVE_UP_DOWN_DELTA represent the speed on an axis.
 Their values range from -100 to 100, where +100 is the maximum speed in one direction, and -100 is the maximum speed in the other.
 (e.g. MOVE_FORWARD_BACKWARD_DELTA of +100 will make the player move forward at 100% of max speed, and -100 will make the player
 move backward at 100% of max speed).
 A list of values is expected to be passed as the action (e.g. [0, 1, 0, 0, 1, 0, .... ]).
-Each map is restricted on what actions can be performed, but the mapping is the same across all maps.
+Each mission is restricted on what actions can be performed, but the mapping is the same across all missions.
-For example, if we want to [0] - ATTACK, [2] - JUMP, and [12] - MOVE_FORWARD at the same time, we would submit the following action:
+For example, if we want to [0] - ATTACK, [2] - JUMP, and [13] - MOVE_FORWARD at the same time, we would submit the following action:
 ```python
-action = [0] * 40
+action = [0] * 43
 action[0] = 1
 action[2] = 1
-action[12] = 1
+action[13] = 1
 ```
 The full list of possible actions is:
@@ -26,38 +32,52 @@ The full list of possible actions is:
 * [2]  - JUMP                             - Jump - Values 0 or 1
 * [3]  - CROUCH                           - Crouch - Values 0 or 1
 * [4]  - TURN180                          - Perform 180 turn - Values 0 or 1
-* [5]  - RELOAD                           - Reload weapon - Values 0 or 1
+* [5] -  ALT_ATTACK                       - Perform alternate attack
-* [6]  - ZOOM                             - Toggle zoom in/out - Values 0 or 1
+* [6]  - RELOAD                           - Reload weapon - Values 0 or 1
-* [7]  - SPEED                            - Run faster - Values 0 or 1
+* [7]  - ZOOM                             - Toggle zoom in/out - Values 0 or 1
-* [8]  - STRAFE                           - Strafe (moving sideways in a circle) - Values 0 or 1
+* [8]  - SPEED                            - Run faster - Values 0 or 1
-* [9]  - MOVE_RIGHT                       - Move to the right - Values 0 or 1
+* [9]  - STRAFE                           - Strafe (moving sideways in a circle) - Values 0 or 1
-* [10] - MOVE_LEFT                        - Move to the left - Values 0 or 1
+* [10] - MOVE_RIGHT                       - Move to the right - Values 0 or 1
-* [11] - MOVE_BACKWARD                    - Move backward - Values 0 or 1
+* [11] - MOVE_LEFT                        - Move to the left - Values 0 or 1
-* [12] - MOVE_FORWARD                     - Move forward - Values 0 or 1
+* [12] - MOVE_BACKWARD                    - Move backward - Values 0 or 1
-* [13] - TURN_RIGHT                       - Turn right - Values 0 or 1
+* [13] - MOVE_FORWARD                     - Move forward - Values 0 or 1
-* [14] - TURN_LEFT                        - Turn left - Values 0 or 1
+* [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
-* [15] - LOOK_UP                          - Look up - Values 0 or 1
+* [15] - TURN_LEFT                        - Turn left - Values 0 or 1
-* [16] - LOOK_DOWN                        - Look down - Values 0 or 1
+* [16] - LOOK_UP                          - Look up - Values 0 or 1
-* [17] - LAND                             - Land (e.g. drop from ladder) - Values 0 or 1
+* [17] - LOOK_DOWN                        - Look down - Values 0 or 1
-* [18] - SELECT_WEAPON1                   - Select weapon 1 - Values 0 or 1
+* [18] - MOVE_UP                          - Move up - Values 0 or 1
-* [19] - SELECT_WEAPON2                   - Select weapon 2 - Values 0 or 1
+* [19] - MOVE_DOWN                        - Move down - Values 0 or 1
-* [20] - SELECT_WEAPON3                   - Select weapon 3 - Values 0 or 1
+* [20] - LAND                             - Land (e.g. drop from ladder) - Values 0 or 1
-* [21] - SELECT_WEAPON4                   - Select weapon 4 - Values 0 or 1
+* [21] - SELECT_WEAPON1                   - Select weapon 1 - Values 0 or 1
-* [22] - SELECT_WEAPON5                   - Select weapon 5 - Values 0 or 1
+* [22] - SELECT_WEAPON2                   - Select weapon 2 - Values 0 or 1
-* [23] - SELECT_WEAPON6                   - Select weapon 6 - Values 0 or 1
+* [23] - SELECT_WEAPON3                   - Select weapon 3 - Values 0 or 1
-* [24] - SELECT_WEAPON7                   - Select weapon 7 - Values 0 or 1
+* [24] - SELECT_WEAPON4                   - Select weapon 4 - Values 0 or 1
-* [25] - SELECT_WEAPON8                   - Select weapon 8 - Values 0 or 1
+* [25] - SELECT_WEAPON5                   - Select weapon 5 - Values 0 or 1
-* [26] - SELECT_WEAPON9                   - Select weapon 9 - Values 0 or 1
+* [26] - SELECT_WEAPON6                   - Select weapon 6 - Values 0 or 1
-* [27] - SELECT_WEAPON0                   - Select weapon 0 - Values 0 or 1
+* [27] - SELECT_WEAPON7                   - Select weapon 7 - Values 0 or 1
-* [28] - SELECT_NEXT_WEAPON               - Select next weapon - Values 0 or 1
+* [28] - SELECT_WEAPON8                   - Select weapon 8 - Values 0 or 1
-* [29] - SELECT_PREV_WEAPON               - Select previous weapon - Values 0 or 1
+* [29] - SELECT_WEAPON9                   - Select weapon 9 - Values 0 or 1
-* [30] - DROP_SELECTED_WEAPON             - Drop selected weapon - Values 0 or 1
+* [30] - SELECT_WEAPON0                   - Select weapon 0 - Values 0 or 1
-* [31] - ACTIVATE_SELECTED_WEAPON         - Activate selected weapon - Values 0 or 1
+* [31] - SELECT_NEXT_WEAPON               - Select next weapon - Values 0 or 1
-* [32] - SELECT_NEXT_ITEM                 - Select next item - Values 0 or 1
+* [32] - SELECT_PREV_WEAPON               - Select previous weapon - Values 0 or 1
-* [33] - SELECT_PREV_ITEM                 - Select previous item - Values 0 or 1
+* [33] - DROP_SELECTED_WEAPON             - Drop selected weapon - Values 0 or 1
-* [34] - DROP_SELECTED_ITEM               - Drop selected item - Values 0 or 1
+* [34] - ACTIVATE_SELECTED_WEAPON         - Activate selected weapon - Values 0 or 1
-* [35] - LOOK_UP_DOWN_DELTA               - Look Up - Values 0 to 10 (Higher value increases speed)
+* [35] - SELECT_NEXT_ITEM                 - Select next item - Values 0 or 1
-* [36] - TURN_LEFT_RIGHT_DELTA            - Turn left/right - Values 0 to 10 (Higher value increases speed)
+* [36] - SELECT_PREV_ITEM                 - Select previous item - Values 0 or 1
-* [37] - MOVE_FORWARD_BACKWARD_DELTA      - Move forward/backward - Values 0 to 10 (Higher value increases speed)
+* [37] - DROP_SELECTED_ITEM               - Drop selected item - Values 0 or 1
-* [38] - MOVE_LEFT_RIGHT_DELTA            - Move left/right - Values 0 to 10 (Higher value increases speed)
+* [38] - LOOK_UP_DOWN_DELTA               - Look Up/Down - Range of -10 to 10 (integer).
-* [39] - MOVE_UP_DOWN_DELTA               - Move up/down - Values 0 to 10 (Higher value increases speed)
+                                          - Value is the angle - +5 will look up 5 degrees, -5 will look down 5 degrees
 * [39] - TURN_LEFT_RIGHT_DELTA            - Turn Left/Right - Range of -10 to 10 (integer).
                                          - Value is the angle - +5 will turn right 5 degrees, -5 will turn left 5 degrees
 * [40] - MOVE_FORWARD_BACKWARD_DELTA      - Speed of forward/backward movement - Range -100 to 100 (integer).
                                          - +100 is max speed forward, -100 is max speed backward, 0 is no movement
 * [41] - MOVE_LEFT_RIGHT_DELTA            - Speed of left/right movement - Range -100 to 100 (integer).
                                          - +100 is max speed right, -100 is max speed left, 0 is no movement
 * [42] - MOVE_UP_DOWN_DELTA               - Speed of up/down movement - Range -100 to 100 (integer).
                                          - +100 is max speed up, -100 is max speed down, 0 is no movement
 To control the player in 'human' mode, the following keys should work:
 * Arrow Keys for MOVE_FORWARD, MOVE_BACKWARD, LEFT_TURN, RIGHT_TURN
 * '<' and '>' for MOVE_RIGHT and MOVE_LEFT
 * Ctrl (or left mouse click) for ATTACK
--- a/gym/envs/doom/doom_basic.py
+++ b/gym/envs/doom/doom_basic.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -20,48 +13,35 @@ class DoomBasicEnv(doom_env.DoomEnv):
    Allowed actions:
        [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
-        [9]  - MOVE_RIGHT                       - Move to the right - Values 0 or 1
+        [10] - MOVE_RIGHT                       - Move to the right - Values 0 or 1
-        [10] - MOVE_LEFT                        - Move to the left - Values 0 or 1
+        [11] - MOVE_LEFT                        - Move to the left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
        +101    - Killing the monster
        -  5    - Missing a shot
-        -  1    - Several times per second - Kill the monster faster!
+        -  1    - 35 times per second - Kill the monster faster!
    Goal: 10 points
        Kill the monster in 3 secs with 1 shot
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Monster is dead
        - Player is dead
        - Timeout (10 seconds - 350 frames)
    Actions:
        actions = [0] * 43
        actions[0] = 0       # ATTACK
        actions[10] = 1      # MOVE_RIGHT
        actions[11] = 0      # MOVE_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomBasicEnv, self).__init__()
+        super(DoomBasicEnv, self).__init__(0)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/basic.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('basic.wad'))
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # 3 allowed actions [0, 9, 10] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_corridor.py
+++ b/gym/envs/doom/doom_corridor.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -19,11 +12,11 @@ class DoomCorridorEnv(doom_env.DoomEnv):
    Allowed actions:
        [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
-        [9]  - MOVE_RIGHT                       - Move to the right - Values 0 or 1
+        [10] - MOVE_RIGHT                       - Move to the right - Values 0 or 1
-        [10] - MOVE_LEFT                        - Move to the left - Values 0 or 1
+        [11] - MOVE_LEFT                        - Move to the left - Values 0 or 1
-        [12] - MOVE_FORWARD                     - Move forward - Values 0 or 1
+        [13] - MOVE_FORWARD                     - Move forward - Values 0 or 1
-        [13] - TURN_RIGHT                       - Turn right - Values 0 or 1
+        [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
-        [14] - TURN_LEFT                        - Turn left - Values 0 or 1
+        [15] - TURN_LEFT                        - Turn left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
@@ -31,38 +24,29 @@ class DoomCorridorEnv(doom_env.DoomEnv):
        - dX    - For getting further from the vest
        -100    - Penalty for being killed
-    Goal: 1,270 points
+    Goal: 1,000 points
-     Reach the vest (try also killing guards, rather than just running)
+        Reach the vest (or at least get past the guards in the 3rd group)
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Player touches vest
        - Player is dead
        - Timeout (1 minutes - 2,100 frames)
    Actions:
        actions = [0] * 43
        actions[0] = 0       # ATTACK
        actions[10] = 1      # MOVE_RIGHT
        actions[11] = 0      # MOVE_LEFT
        actions[13] = 0      # MOVE_FORWARD
        actions[14] = 0      # TURN_RIGHT
        actions[15] = 0      # TURN_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomCorridorEnv, self).__init__()
+        super(DoomCorridorEnv, self).__init__(1)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/deadly_corridor.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('deadly_corridor.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # action indexes are [0, 9, 10, 12, 13, 14]
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_deathmatch.py
+++ b/gym/envs/doom/doom_deathmatch.py
@@ -1,11 +1,4 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.utils import seeding
 from gym.envs.doom import doom_env
 logger = logging.getLogger(__name__)
@@ -22,37 +15,27 @@ class DoomDeathmatchEnv(doom_env.DoomEnv):
    Rewards:
        +1      - Killing a monster
-    Goal: 25 points
+    Goal: 20 points
-        Kill 25 monsters without being killed
+        Kill 20 monsters
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (mouse and full keyboard)
    Ends when:
        - Player is dead
        - Timeout (3 minutes - 6,300 frames)
    Actions:
        actions = [0] * 43
        actions[0] = 0       # ATTACK
        actions[1] = 0       # USE
        [...]
        actions[42] = 0      # MOVE_UP_DOWN_DELTA
        A full list of possible actions is available in controls.md
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomDeathmatchEnv, self).__init__()
+        super(DoomDeathmatchEnv, self).__init__(8)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/deathmatch.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('deathmatch.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # 41 allowed actions (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 39 + [[0, 10, 0]] * 5))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_defend_center.py
+++ b/gym/envs/doom/doom_defend_center.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -17,51 +10,39 @@ class DoomDefendCenterEnv(doom_env.DoomEnv):
    You will also need to keep an eye on your ammunition level. You are only
    rewarded for kills, so figure out how to stay alive.
-    The map is a circle with monsters in the middle. Monsters will
+    The map is a circle with monsters. You are in the middle. Monsters will
    respawn with additional health when killed. Kill as many as you can
    before you run out of ammo.
    Allowed actions:
        [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
-        [13] - TURN_RIGHT                       - Turn right - Values 0 or 1
+        [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
-        [14] - TURN_LEFT                        - Turn left - Values 0 or 1
+        [15] - TURN_LEFT                        - Turn left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
-        +  1    - Killing the monster
+        +  1    - Killing a monster
        -  1    - Penalty for being killed
    Goal: 10 points
-        Kill 10 monsters (you have 26 ammo)
+        Kill 11 monsters (you have 26 ammo)
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Player is dead
        - Timeout (60 seconds - 2100 frames)
    Actions:
        actions = [0] * 43
        actions[0] = 0       # ATTACK
        actions[14] = 1      # TURN_RIGHT
        actions[15] = 0      # TURN_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomDefendCenterEnv, self).__init__()
+        super(DoomDefendCenterEnv, self).__init__(2)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/defend_the_center.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_center.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # 3 allowed actions [0, 13, 14] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_defend_line.py
+++ b/gym/envs/doom/doom_defend_line.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -17,49 +10,39 @@ class DoomDefendLineEnv(doom_env.DoomEnv):
    Your ammo will automatically replenish. You are only rewarded for kills,
    so figure out how to stay alive.
-    The map is a rectangle with monsters in the middle. Monsters will
+    The map is a rectangle with monsters on the other side. Monsters will
    respawn with additional health when killed. Kill as many as you can
    before they kill you. This map is harder than the previous.
    Allowed actions:
        [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
-        [13] - TURN_RIGHT                       - Turn right - Values 0 or 1
+        [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
-        [14] - TURN_LEFT                        - Turn left - Values 0 or 1
+        [15] - TURN_LEFT                        - Turn left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
-        +  1    - Killing the monster
+        +  1    - Killing a monster
        -  1    - Penalty for being killed
-    Goal: 25 points
+    Goal: 15 points
-        Kill 25 monsters
+        Kill 16 monsters
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Player is dead
        - Timeout (60 seconds - 2100 frames)
    Actions:
        actions = [0] * 43
        actions[0] = 0       # ATTACK
        actions[14] = 1      # TURN_RIGHT
        actions[15] = 0      # TURN_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomDefendLineEnv, self).__init__()
+        super(DoomDefendLineEnv, self).__init__(3)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/defend_the_line.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_line.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        # 3 allowed actions [0, 13, 14] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
        self.game.init()
        self.game.new_episode()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_env.py
+++ b/gym/envs/doom/doom_env.py
@@ -1,57 +1,184 @@
-import logging
+import logging, os
 from time import sleep
-import numpy
+import numpy as np
 import gym
-from gym import utils
+from gym import utils, spaces
 from gym.utils import seeding
 try:
    import doom_py
    from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 except ImportError as e:
    raise gym.error.DependencyNotInstalled("{}. (HINT: you can install Doom dependencies with 'pip install gym[doom].)'".format(e))
 logger = logging.getLogger(__name__)
-class DoomEnv(gym.Env, utils.EzPickle):
+# Constants
-    metadata = {'render.modes': ['human', 'rgb_array']}
+NUM_ACTIONS = 43
 NUM_LEVELS = 9
 CONFIG = 0
 SCENARIO = 1
 MAP = 2
 DIFFICULTY = 3
 ACTIONS = 4
 MIN_SCORE = 5
 TARGET_SCORE = 6
-    def __init__(self):
+# Format (config, scenario, map, difficulty, actions, min, target)
 DOOM_SETTINGS = [
    ['basic.cfg', 'basic.wad', 'map01', 5, [0, 10, 11], -485, 10],                                  # 0 - Basic
    ['deadly_corridor.cfg', 'deadly_corridor.wad', '', 1, [0, 10, 11, 13, 14, 15], -120, 1000],     # 1 - Corridor
    ['defend_the_center.cfg', 'defend_the_center.wad', '', 5, [0, 14, 15], -1, 10],                 # 2 - DefendCenter
    ['defend_the_line.cfg', 'defend_the_line.wad', '', 5, [0, 14, 15], -1, 15],                     # 3 - DefendLine
    ['health_gathering.cfg', 'health_gathering.wad', 'map01', 5, [13, 14, 15], 0, 1000],            # 4 - HealthGathering
    ['my_way_home.cfg', 'my_way_home.wad', '', 5, [13, 14, 15], -0.22, 0.5],                        # 5 - MyWayHome
    ['predict_position.cfg', 'predict_position.wad', 'map01', 3, [0, 14, 15], -0.075, 0.5],         # 6 - PredictPosition
    ['take_cover.cfg', 'take_cover.wad', 'map01', 5, [10, 11], 0, 750],                             # 7 - TakeCover
    ['deathmatch.cfg', 'deathmatch.wad', '', 5, list(range(NUM_ACTIONS)), 0, 20]                    # 8 - Deathmatch
 ]
 class DoomEnv(gym.Env, utils.EzPickle):
    metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35}
    def __init__(self, level):
        utils.EzPickle.__init__(self)
        self.previous_level = -1
        self.level = level
        self.game = DoomGame()
        self.loader = Loader()
        self.doom_dir = os.path.dirname(os.path.abspath(__file__))
        self.mode = 'fast'                          # 'human', 'fast' or 'normal'
        self.no_render = False                      # To disable double rendering in human mode
        self.viewer = None
        self.is_initialized = False                 # Indicates that reset() has been called
        self.find_new_level = False                 # Indicates that we need a level change
        self.curr_seed  = 0
        self.screen_height = 480
        self.screen_width = 640
        self.action_space = spaces.HighLow(
            np.matrix([[0, 1, 0]] * 38 + [[-10, 10, 0]] * 2 + [[-100, 100, 0]] * 3, dtype=np.int8))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self.allowed_actions = list(range(NUM_ACTIONS))
    def _load_level(self):
        # Closing if is_initialized
        if self.is_initialized:
            self.is_initialized = False
            self.game.close()
            self.game = DoomGame()
        # Loading Paths
        if not self.is_initialized:
            self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
            self.game.set_doom_game_path(self.loader.get_freedoom_path())
        # Common settings
        self._closed = False
        self.game.load_config(os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG]))
        self.game.set_doom_scenario_path(self.loader.get_scenario_path(DOOM_SETTINGS[self.level][SCENARIO]))
        if DOOM_SETTINGS[self.level][MAP] != '':
            self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP])
        self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY])
        self.previous_level = self.level
        self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS]
        # Algo mode
        if 'human' != self.mode:
            self.game.set_window_visible(False)
            self.game.set_mode(Mode.PLAYER)
            self.no_render = False
            self.game.init()
            self._start_episode()
            self.is_initialized = True
            return self.game.get_state().image_buffer.copy()
        # Human mode
        else:
            self.game.add_game_args('+freelook 1')
            self.game.set_window_visible(True)
            self.game.set_mode(Mode.SPECTATOR)
            self.no_render = True
            self.game.init()
            self._start_episode()
            self.is_initialized = True
            self._play_human_mode()
            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
    def _start_episode(self):
        if self.curr_seed > 0:
            self.game.set_seed(self.curr_seed)
        self.game.new_episode()
        return
    def _play_human_mode(self):
        while not self.game.is_episode_finished():
            self.game.advance_action()
            state = self.game.get_state()
            total_reward = self.game.get_total_reward()
            info = self._get_game_variables(state.game_variables)
            info["TOTAL_REWARD"] = round(total_reward, 4)
            print('===============================')
            print('State: #' + str(state.number))
            print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)')
            print('Reward: \t' + str(self.game.get_last_reward()))
            print('Total Reward: \t' + str(total_reward))
            print('Variables: \n' + str(info))
            sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
        print('===============================')
        print('Done')
        return
    def _step(self, action):
-        # action is a np array but DoomGame.make_action expects a list of ints
+        if NUM_ACTIONS != len(action):
-        list_action = [int(x) for x in action]
+            logger.warn('Doom action list must contain %d items. Padding missing items with 0' % NUM_ACTIONS)
            old_action = action
            action = [0] * NUM_ACTIONS
            for i in range(len(old_action)):
                action[i] = old_action[i]
        # action is a list of numbers but DoomGame.make_action expects a list of ints
        if len(self.allowed_actions) > 0:
            list_action = [int(action[action_idx]) for action_idx in self.allowed_actions]
        else:
            list_action = [int(x) for x in action]
        try:
            state = self.game.get_state()
            reward = self.game.make_action(list_action)
            state = self.game.get_state()
            info = self._get_game_variables(state.game_variables)
            info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4)
            if self.game.is_episode_finished():
                is_finished = True
                return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info
            else:
                is_finished = False
-            return state.image_buffer.copy(), reward, is_finished, {}
+                return state.image_buffer.copy(), reward, is_finished, info
        except doom_py.vizdoom.ViZDoomIsNotRunningException:
-            return [], 0, True, {}
+            return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {}
    def _reset(self):
-        self.game.new_episode()
+        if self.is_initialized and not self._closed:
-        return self.game.get_state().image_buffer.copy()
+            self._start_episode()
            return self.game.get_state().image_buffer.copy()
        else:
            return self._load_level()
    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
-                # If we don't None out this reference pyglet becomes unhappy
+                self.viewer = None      # If we don't None out this reference pyglet becomes unhappy
                self.viewer = None
            return
        try:
            if 'human' == mode and self.no_render: return
            state = self.game.get_state()
            img = state.image_buffer
            # VizDoom returns None if the episode is finished, let's make it
            # an empty image so the recorder doesn't stop
            if img is None:
-                img = numpy.zeros((self.screen_height, self.screen_width, 3), dtype=numpy.uint8)
+                img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
            if mode == 'rgb_array':
                return img
            elif mode is 'human':
@@ -59,9 +186,42 @@ class DoomEnv(gym.Env, utils.EzPickle):
                if self.viewer is None:
                    self.viewer = rendering.SimpleImageViewer()
                self.viewer.imshow(img)
-                sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
+                if 'normal' == self.mode:
                    sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
        except doom_py.vizdoom.ViZDoomIsNotRunningException:
            pass # Doom has been closed
    def _close(self):
        self.game.close()
    def _seed(self, seed=None):
        self.curr_seed = seeding.hash_seed(seed) % 2 ** 32
        return [ self.curr_seed ]
    def _get_game_variables(self, state_variables):
        info = {}
        info["LEVEL"] = self.level
        if state_variables is None: return info
        info['KILLCOUNT'] = state_variables[0]
        info['ITEMCOUNT'] = state_variables[1]
        info['SECRETCOUNT'] = state_variables[2]
        info['FRAGCOUNT'] = state_variables[3]
        info['HEALTH'] = state_variables[4]
        info['ARMOR'] = state_variables[5]
        info['DEAD'] = state_variables[6]
        info['ON_GROUND'] = state_variables[7]
        info['ATTACK_READY'] = state_variables[8]
        info['ALTATTACK_READY'] = state_variables[9]
        info['SELECTED_WEAPON'] = state_variables[10]
        info['SELECTED_WEAPON_AMMO'] = state_variables[11]
        info['AMMO1'] = state_variables[12]
        info['AMMO2'] = state_variables[13]
        info['AMMO3'] = state_variables[14]
        info['AMMO4'] = state_variables[15]
        info['AMMO5'] = state_variables[16]
        info['AMMO6'] = state_variables[17]
        info['AMMO7'] = state_variables[18]
        info['AMMO8'] = state_variables[19]
        info['AMMO9'] = state_variables[20]
        info['AMMO0'] = state_variables[21]
        return info
--- a/gym/envs/doom/doom_health_gathering.py
+++ b/gym/envs/doom/doom_health_gathering.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -19,47 +12,34 @@ class DoomHealthGatheringEnv(doom_env.DoomEnv):
    additional kits will spawn at interval.
    Allowed actions:
-        [12] - MOVE_FORWARD                     - Move forward - Values 0 or 1
+        [13] - MOVE_FORWARD                     - Move forward - Values 0 or 1
-        [13] - TURN_RIGHT                       - Turn right - Values 0 or 1
+        [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
-        [14] - TURN_LEFT                        - Turn left - Values 0 or 1
+        [15] - TURN_LEFT                        - Turn left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
-        +  1    - Several times per second - Survive as long as possible
+        +  1    - 35 times per second - Survive as long as possible
        -100    - Death penalty
    Goal: 1000 points
        Stay alive long enough to reach 1,000 points (~ 30 secs)
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Player is dead
        - Timeout (60 seconds - 2,100 frames)
    Actions:
        actions = [0] * 43
        actions[13] = 0      # MOVE_FORWARD
        actions[14] = 1      # TURN_RIGHT
        actions[15] = 0      # TURN_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomHealthGatheringEnv, self).__init__()
+        super(DoomHealthGatheringEnv, self).__init__(4)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/health_gathering.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('health_gathering.wad'))
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # 3 allowed actions [12, 13, 14] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_my_way_home.py
+++ b/gym/envs/doom/doom_my_way_home.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -19,46 +12,34 @@ class DoomMyWayHomeEnv(doom_env.DoomEnv):
    The vest is always in the same room. Player must find the vest.
    Allowed actions:
-        [12] - MOVE_FORWARD                     - Move forward - Values 0 or 1
+        [13] - MOVE_FORWARD                     - Move forward - Values 0 or 1
-        [13] - TURN_RIGHT                       - Turn right - Values 0 or 1
+        [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
-        [14] - TURN_LEFT                        - Turn left - Values 0 or 1
+        [15] - TURN_LEFT                        - Turn left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
        +  1    - Finding the vest
-        -0.0001 - Several times per second - Find the vest quick!
+        -0.0001 - 35 times per second - Find the vest quick!
    Goal: 0.50 point
        Find the vest
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Vest is found
-        - Timeout (2 minutes - 4,200 frames)
+        - Timeout (1 minutes - 2,100 frames)
    Actions:
        actions = [0] * 43
        actions[13] = 0      # MOVE_FORWARD
        actions[14] = 1      # TURN_RIGHT
        actions[15] = 0      # TURN_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomMyWayHomeEnv, self).__init__()
+        super(DoomMyWayHomeEnv, self).__init__(5)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/my_way_home.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('my_way_home.wad'))
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # 3 allowed actions [12, 13, 14] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_predict_position.py
+++ b/gym/envs/doom/doom_predict_position.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -21,13 +14,13 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
    Allowed actions:
        [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
-        [13] - TURN_RIGHT                       - Turn right - Values 0 or 1
+        [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
-        [14] - TURN_LEFT                        - Turn left - Values 0 or 1
+        [15] - TURN_LEFT                        - Turn left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
        +  1    - Killing the monster
-        -0.0001 - Several times per second - Kill the monster faster!
+        -0.0001 - 35 times per second - Kill the monster faster!
    Goal: 0.5 point
        Kill the monster
@@ -35,36 +28,23 @@ class DoomPredictPositionEnv(doom_env.DoomEnv):
    Hint: Missile launcher takes longer to load. You must wait a good second after the game starts
        before trying to fire it.
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Monster is dead
        - Out of missile (you only have one)
        - Timeout (20 seconds - 700 frames)
    Actions:
        actions = [0] * 43
        actions[0] = 0       # ATTACK
        actions[14] = 1      # TURN_RIGHT
        actions[15] = 0      # TURN_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        package_directory = os.path.dirname(os.path.abspath(__file__))
+        super(DoomPredictPositionEnv, self).__init__(6)
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/predict_position.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('predict_position.wad'))
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # 3 allowed actions [0, 13, 14] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        # Derive a random seed.
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/doom/doom_take_cover.py
+++ b/gym/envs/doom/doom_take_cover.py
@@ -1,12 +1,5 @@
 import logging
 import os
 import numpy as np
 from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
 from gym import error, spaces
 from gym.envs.doom import doom_env
 from gym.utils import seeding
 logger = logging.getLogger(__name__)
@@ -18,45 +11,31 @@ class DoomTakeCoverEnv(doom_env.DoomEnv):
    at you. You need to survive as long as possible.
    Allowed actions:
-        [9]  - MOVE_RIGHT                       - Move to the right - Values 0 or 1
+        [10] - MOVE_RIGHT                       - Move to the right - Values 0 or 1
-        [10] - MOVE_LEFT                        - Move to the left - Values 0 or 1
+        [11] - MOVE_LEFT                        - Move to the left - Values 0 or 1
    Note: see controls.md for details
    Rewards:
-        +  1    - Several times per second - Survive as long as possible
+        +  1    - 35 times per second - Survive as long as possible
    Goal: 750 points
        Survive for ~ 20 seconds
    Mode:
        - env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
        - 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
        - 'normal' will run at roughly 35 fps (easier for human to watch)
        - 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
    Ends when:
        - Player is dead (one or two fireballs should be enough to kill you)
        - Timeout (60 seconds - 2,100 frames)
    Actions:
        actions = [0] * 43
        actions[10] = 0      # MOVE_RIGHT
        actions[11] = 1      # MOVE_LEFT
    -----------------------------------------------------
    """
    def __init__(self):
-        super(DoomTakeCoverEnv, self).__init__()
+        super(DoomTakeCoverEnv, self).__init__(7)
        package_directory = os.path.dirname(os.path.abspath(__file__))
        self.loader = Loader()
        self.game = DoomGame()
        self.game.load_config(os.path.join(package_directory, 'assets/take_cover.cfg'))
        self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
        self.game.set_doom_game_path(self.loader.get_freedoom_path())
        self.game.set_doom_scenario_path(self.loader.get_scenario_path('take_cover.wad'))
        self.game.set_doom_map('map01')
        self.screen_height = 480                    # Must match .cfg file
        self.screen_width = 640                     # Must match .cfg file
        self.game.set_window_visible(False)
        self.viewer = None
        self.game.init()
        self.game.new_episode()
        # 2 allowed actions [9, 10] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
        self._seed()
    def _seed(self, seed=None):
        seed = seeding.hash_seed(seed) % 2**32
        self.game.set_seed(seed)
        return [seed]
--- a/gym/envs/tests/test_envs.py
+++ b/gym/envs/tests/test_envs.py
@@ -22,6 +22,11 @@ def should_skip_env_spec_for_tests(spec):
        logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
        return True
    # TODO: Issue #167 - Re-enable these tests after fixing DoomDeathmatch crash
    if spec._entry_point.startswith('gym.envs.doom:DoomDeathmatchEnv'):
        logger.warn("Skipping tests for DoomDeathmatchEnv {}".format(spec._entry_point))
        return True
    # Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
    if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
        logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))
--- a/gym/scoreboard/init.py
+++ b/gym/scoreboard/init.py
@@ -640,6 +640,31 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #1 - Kill a single monster using your pistol.',
    description="""
 This map is rectangular with gray walls, ceiling and floor.
 You are spawned in the center of the longer wall, and a red
 circular monster is spawned randomly on the opposite wall.
 You need to kill the monster (one bullet is enough).
 Allowed actions:
    [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
    [10] - MOVE_RIGHT                       - Move to the right - Values 0 or 1
    [11] - MOVE_LEFT                        - Move to the left - Values 0 or 1
 Rewards:
    +101    - Killing the monster
    -  5    - Missing a shot
    -  1    - 35 times per second - Kill the monster faster!
 Goal: 10 points
    Kill the monster in 3 secs with 1 shot
 Ends when:
    - Monster is dead
    - Player is dead
    - Timeout (10 seconds - 350 frames)
 """
 )
 add_task(
@@ -647,6 +672,33 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #2 - Run as fast as possible to grab a vest.',
    description="""
 This map is designed to improve your navigation. There is a vest
 at the end of the corridor, with 6 enemies (3 groups of 2). Your goal
 is to get to the vest as soon as possible, without being killed.
 Allowed actions:
    [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
    [10] - MOVE_RIGHT                       - Move to the right - Values 0 or 1
    [11] - MOVE_LEFT                        - Move to the left - Values 0 or 1
    [13] - MOVE_FORWARD                     - Move forward - Values 0 or 1
    [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
    [15] - TURN_LEFT                        - Turn left - Values 0 or 1
 Rewards:
    + dX    - For getting closer to the vest
    - dX    - For getting further from the vest
    -100    - Penalty for being killed
 Goal: 1,000 points
    Reach the vest (or at least get past the guards in the 3rd group)
 Ends when:
    - Player touches vest
    - Player is dead
    - Timeout (1 minutes - 2,100 frames)
 """
 )
 add_task(
@@ -654,6 +706,32 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #3 - Kill enemies coming at your from all sides.',
    description="""
 This map is designed to teach you how to kill and how to stay alive.
 You will also need to keep an eye on your ammunition level. You are only
 rewarded for kills, so figure out how to stay alive.
 The map is a circle with monsters. You are in the middle. Monsters will
 respawn with additional health when killed. Kill as many as you can
 before you run out of ammo.
 Allowed actions:
    [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
    [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
    [15] - TURN_LEFT                        - Turn left - Values 0 or 1
 Rewards:
    +  1    - Killing a monster
    -  1    - Penalty for being killed
 Goal: 10 points
    Kill 11 monsters (you have 26 ammo)
 Ends when:
    - Player is dead
    - Timeout (60 seconds - 2100 frames)
 """
 )
 add_task(
@@ -661,6 +739,32 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #4 - Kill enemies on the other side of the room.',
    description="""
 This map is designed to teach you how to kill and how to stay alive.
 Your ammo will automatically replenish. You are only rewarded for kills,
 so figure out how to stay alive.
 The map is a rectangle with monsters on the other side. Monsters will
 respawn with additional health when killed. Kill as many as you can
 before they kill you. This map is harder than the previous.
 Allowed actions:
    [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
    [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
    [15] - TURN_LEFT                        - Turn left - Values 0 or 1
 Rewards:
    +  1    - Killing a monster
    -  1    - Penalty for being killed
 Goal: 15 points
    Kill 16 monsters
 Ends when:
    - Player is dead
    - Timeout (60 seconds - 2100 frames)
 """
 )
 add_task(
@@ -668,6 +772,29 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #5 - Learn to grad medkits to survive as long as possible.',
    description="""
 This map is a guide on how to survive by collecting health packs.
 It is a rectangle with green, acidic floor which hurts the player
 periodically. There are also medkits spread around the map, and
 additional kits will spawn at interval.
 Allowed actions:
    [13] - MOVE_FORWARD                     - Move forward - Values 0 or 1
    [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
    [15] - TURN_LEFT                        - Turn left - Values 0 or 1
 Rewards:
    +  1    - 35 times per second - Survive as long as possible
    -100    - Death penalty
 Goal: 1000 points
    Stay alive long enough to reach 1,000 points (~ 30 secs)
 Ends when:
    - Player is dead
    - Timeout (60 seconds - 2,100 frames)
 """
 )
 add_task(
@@ -675,6 +802,29 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #6 - Find the vest in one the 4 rooms.',
    description="""
 This map is designed to improve navigational skills. It is a series of
 interconnected rooms and 1 corridor with a dead end. Each room
 has a separate color. There is a green vest in one of the room.
 The vest is always in the same room. Player must find the vest.
 Allowed actions:
    [13] - MOVE_FORWARD                     - Move forward - Values 0 or 1
    [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
    [15] - TURN_LEFT                        - Turn left - Values 0 or 1
 Rewards:
    +  1    - Finding the vest
    -0.0001 - 35 times per second - Find the vest quick!
 Goal: 0.50 point
    Find the vest
 Ends when:
    - Vest is found
    - Timeout (1 minutes - 2,100 frames)
 """
 )
 add_task(
@@ -682,6 +832,34 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #7 - Learn how to kill an enemy with a rocket launcher.',
    description="""
 This map is designed to train you on using a rocket launcher.
 It is a rectangular map with a monster on the opposite side. You need
 to use your rocket launcher to kill it. The rocket adds a delay between
 the moment it is fired and the moment it reaches the other side of the room.
 You need to predict the position of the monster to kill it.
 Allowed actions:
    [0]  - ATTACK                           - Shoot weapon - Values 0 or 1
    [14] - TURN_RIGHT                       - Turn right - Values 0 or 1
    [15] - TURN_LEFT                        - Turn left - Values 0 or 1
 Rewards:
    +  1    - Killing the monster
    -0.0001 - 35 times per second - Kill the monster faster!
 Goal: 0.5 point
    Kill the monster
 Hint: Missile launcher takes longer to load. You must wait a good second after the game starts
    before trying to fire it.
 Ends when:
    - Monster is dead
    - Out of missile (you only have one)
    - Timeout (20 seconds - 700 frames)
 """
 )
 add_task(
@@ -689,6 +867,26 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #8 - Survive as long as possible with enemies shooting at you.',
    description="""
 This map is to train you on the damage of incoming missiles.
 It is a rectangular map with monsters firing missiles and fireballs
 at you. You need to survive as long as possible.
 Allowed actions:
    [10] - MOVE_RIGHT                       - Move to the right - Values 0 or 1
    [11] - MOVE_LEFT                        - Move to the left - Values 0 or 1
 Rewards:
    +  1    - 35 times per second - Survive as long as possible
 Goal: 750 points
    Survive for ~ 20 seconds
 Ends when:
    - Player is dead (one or two fireballs should be enough to kill you)
    - Timeout (60 seconds - 2,100 frames)
 """
 )
 add_task(
@@ -696,6 +894,23 @@ add_task(
    group='doom',
    experimental=True,
    contributor='ppaquette',
    summary='Mission #9 - Kill as many enemies as possible without being killed.',
    description="""
 Kill as many monsters as possible without being killed.
 Allowed actions:
    ALL
 Rewards:
    +1      - Killing a monster
 Goal: 20 points
    Kill 20 monsters
 Ends when:
    - Player is dead
    - Timeout (3 minutes - 6,300 frames)
 """
 )
--- a/gym/spaces/discrete.py
+++ b/gym/spaces/discrete.py
@@ -1,6 +1,6 @@
 import numpy as np
-import gym
+import gym, time
 from gym.spaces import prng
 class Discrete(gym.Space):
--- a/gym/spaces/high_low.py
+++ b/gym/spaces/high_low.py
@@ -8,9 +8,9 @@ class HighLow(gym.Space):
    A matrix of dimensions n x 3, where
    - n is the number of options in the space (e.g. buttons that can be pressed simultaneously)
-    - u[1] (the first column) is the minimum value (inclusive) that the option can have
+    - u[0] (the first column) is the minimum value (inclusive) that the option can have
-    - u[2] (the second column) is the maximum value (inclusive) that the option can have
+    - u[1] (the second column) is the maximum value (inclusive) that the option can have
-    - u[3] (the third column) is the precision (0 = rounded to integer, 2 = rounded to 2 decimals)
+    - u[2] (the third column) is the precision (0 = rounded to integer, 2 = rounded to 2 decimals)
    e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
    the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
@@ -30,15 +30,17 @@ class HighLow(gym.Space):
    def sample(self):
        # For each row: round(random .* (max - min) + min, precision)
-        max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
+        max_minus_min = np.zeros(shape=(self.matrix.shape[0], 1), dtype=np.int32)
        for i in range(self.matrix.shape[0]):                   # Must use this conversion to avoid overflows
            max_minus_min[i] = int(self.matrix[i, 1]) - int(self.matrix[i, 0])
        random_matrix = np.multiply(max_minus_min, prng.np_random.rand(self.num_rows, 1)) + self.matrix[:, 0]
-        rounded_matrix = np.zeros(self.num_rows)
+        rounded_matrix = np.zeros(self.num_rows, dtype=np.int32)
        for i in range(self.num_rows):
            rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))
-        return rounded_matrix
+        return rounded_matrix.tolist()
    def contains(self, x):
-        if x.shape[0] != self.num_rows:
+        if len(x) != self.num_rows:
            return False
        for i in range(self.num_rows):
            if not (self.matrix[i, 0] <= x[i] <= self.matrix[i, 1]):
@@ -52,7 +54,7 @@ class HighLow(gym.Space):
    @property
    def shape(self):
-        return self.matrix.shape
+        return self.matrix.shape[0]
    def __repr__(self):
        return "High-Low" + str(self.shape)
    def __eq__(self, other):
--- a/gym/spaces/tests/init.py
+++ b/gym/spaces/tests/init.py