diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index 076d18a2e..393b117e6 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -254,3 +254,51 @@ register( 'board_size': 9, }, ) + +# Doom +# ---------------------------------------- + +register( + id='DoomBasic-v0', + entry_point='gym.envs.doom:DoomBasicEnv', +) + +register( + id='DoomCorridor-v0', + entry_point='gym.envs.doom:DoomCorridorEnv', +) + +register( + id='DoomDefendCenter-v0', + entry_point='gym.envs.doom:DoomDefendCenterEnv', +) + +register( + id='DoomDefendLine-v0', + entry_point='gym.envs.doom:DoomDefendLineEnv', +) + +register( + id='DoomHealthGathering-v0', + entry_point='gym.envs.doom:DoomHealthGatheringEnv', +) + +register( + id='DoomMyWayHome-v0', + entry_point='gym.envs.doom:DoomMyWayHomeEnv', +) + +register( + id='DoomPredictPosition-v0', + entry_point='gym.envs.doom:DoomPredictPositionEnv', +) + +register( + id='DoomTakeCover-v0', + entry_point='gym.envs.doom:DoomTakeCoverEnv', +) + +register( + id='DoomDeathmatch-v0', + entry_point='gym.envs.doom:DoomDeathmatchEnv', +) diff --git a/gym/envs/doom/__init__.py b/gym/envs/doom/__init__.py new file mode 100644 index 000000000..ca568fd3d --- /dev/null +++ b/gym/envs/doom/__init__.py @@ -0,0 +1,10 @@ +from gym.envs.doom.doom_env import DoomEnv +from gym.envs.doom.doom_basic import DoomBasicEnv +from gym.envs.doom.doom_corridor import DoomCorridorEnv +from gym.envs.doom.doom_defend_center import DoomDefendCenterEnv +from gym.envs.doom.doom_defend_line import DoomDefendLineEnv +from gym.envs.doom.doom_health_gathering import DoomHealthGatheringEnv +from gym.envs.doom.doom_my_way_home import DoomMyWayHomeEnv +from gym.envs.doom.doom_predict_position import DoomPredictPositionEnv +from gym.envs.doom.doom_take_cover import DoomTakeCoverEnv +from gym.envs.doom.doom_deathmatch import DoomDeathmatchEnv diff --git a/gym/envs/doom/assets/basic.cfg b/gym/envs/doom/assets/basic.cfg new file mode 100644 index 000000000..463773c41 --- /dev/null +++ b/gym/envs/doom/assets/basic.cfg @@ -0,0 +1,44 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards (Negative living reward means you lose points for staying alive, and need to finish asap) +living_reward = -1 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = True +render_crosshair = false +render_weapon = true +render_decals = false +render_particles = false + +# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds) +episode_start_time = 14 + +# make episodes finish after 35 tics (10 seconds) +episode_timeout = 350 + +# Available buttons +available_buttons = + { + ATTACK + MOVE_RIGHT + MOVE_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO2 + } + +mode = PLAYER +doom_skill = 5 +sound_enabled = false diff --git a/gym/envs/doom/assets/deadly_corridor.cfg b/gym/envs/doom/assets/deadly_corridor.cfg new file mode 100644 index 000000000..34c696881 --- /dev/null +++ b/gym/envs/doom/assets/deadly_corridor.cfg @@ -0,0 +1,47 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards (Large penalty for being killed) +death_penalty = 100 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = true +render_decals = false +render_particles = false + +# Episode Start Time (Immediate) +episode_start_time = 0 + +# Make episodes finish after 2100 tics (1 minutes) +episode_timeout = 2100 + +# Available buttons +available_buttons = + { + ATTACK + MOVE_RIGHT + MOVE_LEFT + MOVE_FORWARD + TURN_RIGHT + TURN_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO2 + } + +mode = PLAYER +doom_skill = 5 +sound_enabled = false diff --git a/gym/envs/doom/assets/deathmatch.cfg b/gym/envs/doom/assets/deathmatch.cfg new file mode 100644 index 000000000..fc453bad8 --- /dev/null +++ b/gym/envs/doom/assets/deathmatch.cfg @@ -0,0 +1,92 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = true +render_decals = false +render_particles = false + +# Immediate Start +episode_start_time = 0 + +# Make episodes finish after 3 minutes (6300 ticks) +episode_timeout = 6300 + +# Available buttons +available_buttons = + { + ATTACK + USE + JUMP + CROUCH + TURN180 + + RELOAD + ZOOM + SPEED + STRAFE + + MOVE_RIGHT + MOVE_LEFT + MOVE_BACKWARD + MOVE_FORWARD + TURN_RIGHT + + TURN_LEFT + LOOK_UP + LOOK_DOWN + LAND + SELECT_WEAPON1 + + SELECT_WEAPON2 + SELECT_WEAPON3 + SELECT_WEAPON4 + SELECT_WEAPON5 + SELECT_WEAPON6 + + SELECT_WEAPON7 + SELECT_WEAPON8 + SELECT_WEAPON9 + SELECT_WEAPON0 + SELECT_NEXT_WEAPON + + SELECT_PREV_WEAPON + DROP_SELECTED_WEAPON + ACTIVATE_SELECTED_WEAPON + SELECT_NEXT_ITEM + SELECT_PREV_ITEM + + DROP_SELECTED_ITEM + LOOK_UP_DOWN_DELTA + TURN_LEFT_RIGHT_DELTA + MOVE_FORWARD_BACKWARD_DELTA + MOVE_LEFT_RIGHT_DELTA + MOVE_UP_DOWN_DELTA + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + + AMMO1 + AMMO2 + AMMO3 + AMMO4 + AMMO5 + AMMO6 + } + +mode = PLAYER +doom_skill = 5 +sound_enabled = false diff --git a/gym/envs/doom/assets/defend_the_center.cfg b/gym/envs/doom/assets/defend_the_center.cfg new file mode 100644 index 000000000..c6b8f0848 --- /dev/null +++ b/gym/envs/doom/assets/defend_the_center.cfg @@ -0,0 +1,44 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards +death_penalty = 1 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = true +render_decals = false +render_particles = false + +# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds) +episode_start_time = 14 + +# Make episodes finish after 2100 tics (1 minutes) +episode_timeout = 2100 + +# Available buttons +available_buttons = + { + ATTACK + TURN_RIGHT + TURN_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO2 + } + +mode = PLAYER +doom_skill = 3 +sound_enabled = false \ No newline at end of file diff --git a/gym/envs/doom/assets/defend_the_line.cfg b/gym/envs/doom/assets/defend_the_line.cfg new file mode 100644 index 000000000..6061f4514 --- /dev/null +++ b/gym/envs/doom/assets/defend_the_line.cfg @@ -0,0 +1,44 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards +death_penalty = 1 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = true +render_decals = false +render_particles = false + +# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds) +episode_start_time = 14 + +# Make episodes finish after 2100 tics (1 minutes) +episode_timeout = 2100 + +# Available buttons +available_buttons = + { + ATTACK + TURN_RIGHT + TURN_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO2 + } + +mode = PLAYER +doom_skill = 5 +sound_enabled = false diff --git a/gym/envs/doom/assets/health_gathering.cfg b/gym/envs/doom/assets/health_gathering.cfg new file mode 100644 index 000000000..136386634 --- /dev/null +++ b/gym/envs/doom/assets/health_gathering.cfg @@ -0,0 +1,46 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards (Bonus for staying alive, large penalty for being killed) +living_reward = 1 +death_penalty = 100 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = false +render_decals = false +render_particles = false + +# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds) +episode_start_time = 14 + +# Make episodes finish after 2100 tics (1 minutes) +episode_timeout = 2100 + +# Available buttons +available_buttons = + { + MOVE_FORWARD + TURN_RIGHT + TURN_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO2 + } + +mode = PLAYER +doom_skill = 5 +sound_enabled = false + diff --git a/gym/envs/doom/assets/my_way_home.cfg b/gym/envs/doom/assets/my_way_home.cfg new file mode 100644 index 000000000..c90556ed4 --- /dev/null +++ b/gym/envs/doom/assets/my_way_home.cfg @@ -0,0 +1,45 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards +living_reward = -0.0001 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = false +render_decals = false +render_particles = false + +# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds) +episode_start_time = 14 + +# Make episodes finish after 4200 tics (2 minutes) +episode_timeout = 4200 + +# Available buttons +available_buttons = + { + MOVE_FORWARD + TURN_RIGHT + TURN_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO0 + AMMO2 + } + +mode = PLAYER +doom_skill = 5 +sound_enabled = false diff --git a/gym/envs/doom/assets/predict_position.cfg b/gym/envs/doom/assets/predict_position.cfg new file mode 100644 index 000000000..85e63a224 --- /dev/null +++ b/gym/envs/doom/assets/predict_position.cfg @@ -0,0 +1,45 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards +living_reward = -0.0001 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = true +render_decals = false +render_particles = false + +# make episodes start after 16 tics (after unholstering the rocket launcher) (35 tics per seconds) +episode_start_time = 16 + +# Make episodes finish after 700 tics (20 seconds) +episode_timeout = 700 + +# Available buttons +available_buttons = + { + ATTACK + TURN_RIGHT + TURN_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO2 + } + +mode = PLAYER +doom_skill = 3 +sound_enabled = false + diff --git a/gym/envs/doom/assets/take_cover.cfg b/gym/envs/doom/assets/take_cover.cfg new file mode 100644 index 000000000..425573a6e --- /dev/null +++ b/gym/envs/doom/assets/take_cover.cfg @@ -0,0 +1,43 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +# Rewards +living_reward = 1 + +# Rendering options +screen_resolution = RES_640x480 +screen_format = BGR24 +render_hud = true +render_crosshair = false +render_weapon = false +render_decals = false +render_particles = false + +# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds) +episode_start_time = 14 + +# Make episodes finish after 2100 tics (1 minutes) +episode_timeout = 2100 + +# Available buttons +available_buttons = + { + MOVE_RIGHT + MOVE_LEFT + } + +# Game variables that will be in the state +available_game_variables = + { + KILLCOUNT + HEALTH + ARMOR + SELECTED_WEAPON + SELECTED_WEAPON_AMMO + AMMO2 + } + +mode = PLAYER +doom_skill = 5 +sound_enabled = false diff --git a/gym/envs/doom/controls.md b/gym/envs/doom/controls.md new file mode 100644 index 000000000..c4faa2f92 --- /dev/null +++ b/gym/envs/doom/controls.md @@ -0,0 +1,63 @@ +###Controls + +Doom is usually played with a full keyboard, and multiple keys can be pressed at once. + +To replicate this, we broke down the possible actions in 40 keys. Each key can be pressed (value of 1), or unpressed (value of 0). + +The deltas (35 to 39) indicate speed of change (values 0 to 10), where higher values will make the player move faster on an axis. + +A list of values is expected to be passed as the action (e.g. [0, 1, 0, 0, 1, 0, .... ]). + +Each map is restricted on what actions can be performed, but the mapping is the same across all maps. + +For example, if we want to [0] - ATTACK, [2] - JUMP, and [12] - MOVE_FORWARD at the same time, we would submit the following action: + +```python +action = [0] * 40 +action[0] = 1 +action[2] = 1 +action[12] = 1 +``` + +The full list of possible actions is: + +* [0] - ATTACK - Shoot weapon - Values 0 or 1 +* [1] - USE - Use item - Values 0 or 1 +* [2] - JUMP - Jump - Values 0 or 1 +* [3] - CROUCH - Crouch - Values 0 or 1 +* [4] - TURN180 - Perform 180 turn - Values 0 or 1 +* [5] - RELOAD - Reload weapon - Values 0 or 1 +* [6] - ZOOM - Toggle zoom in/out - Values 0 or 1 +* [7] - SPEED - Run faster - Values 0 or 1 +* [8] - STRAFE - Strafe (moving sideways in a circle) - Values 0 or 1 +* [9] - MOVE_RIGHT - Move to the right - Values 0 or 1 +* [10] - MOVE_LEFT - Move to the left - Values 0 or 1 +* [11] - MOVE_BACKWARD - Move backward - Values 0 or 1 +* [12] - MOVE_FORWARD - Move forward - Values 0 or 1 +* [13] - TURN_RIGHT - Turn right - Values 0 or 1 +* [14] - TURN_LEFT - Turn left - Values 0 or 1 +* [15] - LOOK_UP - Look up - Values 0 or 1 +* [16] - LOOK_DOWN - Look down - Values 0 or 1 +* [17] - LAND - Land (e.g. drop from ladder) - Values 0 or 1 +* [18] - SELECT_WEAPON1 - Select weapon 1 - Values 0 or 1 +* [19] - SELECT_WEAPON2 - Select weapon 2 - Values 0 or 1 +* [20] - SELECT_WEAPON3 - Select weapon 3 - Values 0 or 1 +* [21] - SELECT_WEAPON4 - Select weapon 4 - Values 0 or 1 +* [22] - SELECT_WEAPON5 - Select weapon 5 - Values 0 or 1 +* [23] - SELECT_WEAPON6 - Select weapon 6 - Values 0 or 1 +* [24] - SELECT_WEAPON7 - Select weapon 7 - Values 0 or 1 +* [25] - SELECT_WEAPON8 - Select weapon 8 - Values 0 or 1 +* [26] - SELECT_WEAPON9 - Select weapon 9 - Values 0 or 1 +* [27] - SELECT_WEAPON0 - Select weapon 0 - Values 0 or 1 +* [28] - SELECT_NEXT_WEAPON - Select next weapon - Values 0 or 1 +* [29] - SELECT_PREV_WEAPON - Select previous weapon - Values 0 or 1 +* [30] - DROP_SELECTED_WEAPON - Drop selected weapon - Values 0 or 1 +* [31] - ACTIVATE_SELECTED_WEAPON - Activate selected weapon - Values 0 or 1 +* [32] - SELECT_NEXT_ITEM - Select next item - Values 0 or 1 +* [33] - SELECT_PREV_ITEM - Select previous item - Values 0 or 1 +* [34] - DROP_SELECTED_ITEM - Drop selected item - Values 0 or 1 +* [35] - LOOK_UP_DOWN_DELTA - Look Up - Values 0 to 10 (Higher value increases speed) +* [36] - TURN_LEFT_RIGHT_DELTA - Turn left/right - Values 0 to 10 (Higher value increases speed) +* [37] - MOVE_FORWARD_BACKWARD_DELTA - Move forward/backward - Values 0 to 10 (Higher value increases speed) +* [38] - MOVE_LEFT_RIGHT_DELTA - Move left/right - Values 0 to 10 (Higher value increases speed) +* [39] - MOVE_UP_DOWN_DELTA - Move up/down - Values 0 to 10 (Higher value increases speed) diff --git a/gym/envs/doom/doom_basic.py b/gym/envs/doom/doom_basic.py new file mode 100644 index 000000000..dce719b6f --- /dev/null +++ b/gym/envs/doom/doom_basic.py @@ -0,0 +1,57 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomBasicEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 1 - Basic ------------ + This map is rectangular with gray walls, ceiling and floor. + You are spawned in the center of the longer wall, and a red + circular monster is spawned randomly on the opposite wall. + You need to kill the monster (one bullet is enough). + + Allowed actions: + [0] - ATTACK - Shoot weapon - Values 0 or 1 + [9] - MOVE_RIGHT - Move to the right - Values 0 or 1 + [10] - MOVE_LEFT - Move to the left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + +101 - Killing the monster + - 5 - Missing a shot + - 1 - Several times per second - Kill the monster faster! + + Goal: 10 points + Kill the monster in 3 secs with 1 shot + + Ends when: + - Monster is dead + - Player is dead + - Timeout (10 seconds - 350 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/basic.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('basic.wad')) + self.game.set_doom_map('map01') + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 3 allowed actions [0, 9, 10] (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_corridor.py b/gym/envs/doom/doom_corridor.py new file mode 100644 index 000000000..9a3183d3c --- /dev/null +++ b/gym/envs/doom/doom_corridor.py @@ -0,0 +1,58 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomCorridorEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 2 - Corridor ------------ + This map is designed to improve your navigation. There is a vest + at the end of the corridor, with 6 enemies (3 groups of 2). Your goal + is to get to the vest as soon as possible, without being killed. + + Allowed actions: + [0] - ATTACK - Shoot weapon - Values 0 or 1 + [9] - MOVE_RIGHT - Move to the right - Values 0 or 1 + [10] - MOVE_LEFT - Move to the left - Values 0 or 1 + [12] - MOVE_FORWARD - Move forward - Values 0 or 1 + [13] - TURN_RIGHT - Turn right - Values 0 or 1 + [14] - TURN_LEFT - Turn left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + + dX - For getting closer to the vest + - dX - For getting further from the vest + -100 - Penalty for being killed + + Goal: 1,270 points + Reach the vest (try also killing guards, rather than just running) + + Ends when: + - Player touches vest + - Player is dead + - Timeout (1 minutes - 2,100 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/deadly_corridor.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('deadly_corridor.wad')) + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # action indexes are [0, 9, 10, 12, 13, 14] + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_deathmatch.py b/gym/envs/doom/doom_deathmatch.py new file mode 100644 index 000000000..4b0a44453 --- /dev/null +++ b/gym/envs/doom/doom_deathmatch.py @@ -0,0 +1,48 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomDeathmatchEnv(doom_env.DoomEnv): + """ + ------------ Final Mission - Deathmatch ------------ + Kill as many monsters as possible without being killed. + + Allowed actions: + ALL + Note: see controls.md for details + + Rewards: + +1 - Killing a monster + + Goal: 25 points + Kill 25 monsters without being killed + + Ends when: + - Player is dead + - Timeout (3 minutes - 6,300 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/deathmatch.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('deathmatch.wad')) + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 41 allowed actions (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 36 + [[0, 10, 0]] * 5)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_defend_center.py b/gym/envs/doom/doom_defend_center.py new file mode 100644 index 000000000..728e20e16 --- /dev/null +++ b/gym/envs/doom/doom_defend_center.py @@ -0,0 +1,57 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomDefendCenterEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 3 - Defend the Center ------------ + This map is designed to teach you how to kill and how to stay alive. + You will also need to keep an eye on your ammunition level. You are only + rewarded for kills, so figure out how to stay alive. + + The map is a circle with monsters in the middle. Monsters will + respawn with additional health when killed. Kill as many as you can + before you run out of ammo. + + Allowed actions: + [0] - ATTACK - Shoot weapon - Values 0 or 1 + [13] - TURN_RIGHT - Turn right - Values 0 or 1 + [14] - TURN_LEFT - Turn left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + + 1 - Killing the monster + - 1 - Penalty for being killed + + Goal: 10 points + Kill 10 monsters (you have 26 ammo) + + Ends when: + - Player is dead + - Timeout (60 seconds - 2100 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/defend_the_center.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_center.wad')) + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 3 allowed actions [0, 13, 14] (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_defend_line.py b/gym/envs/doom/doom_defend_line.py new file mode 100644 index 000000000..cd9e0ef03 --- /dev/null +++ b/gym/envs/doom/doom_defend_line.py @@ -0,0 +1,57 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomDefendLineEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 4 - Defend the Line ------------ + This map is designed to teach you how to kill and how to stay alive. + Your ammo will automatically replenish. You are only rewarded for kills, + so figure out how to stay alive. + + The map is a rectangle with monsters in the middle. Monsters will + respawn with additional health when killed. Kill as many as you can + before they kill you. This map is harder than the previous. + + Allowed actions: + [0] - ATTACK - Shoot weapon - Values 0 or 1 + [13] - TURN_RIGHT - Turn right - Values 0 or 1 + [14] - TURN_LEFT - Turn left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + + 1 - Killing the monster + - 1 - Penalty for being killed + + Goal: 25 points + Kill 25 monsters + + Ends when: + - Player is dead + - Timeout (60 seconds - 2100 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/defend_the_line.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_line.wad')) + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 3 allowed actions [0, 13, 14] (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_env.py b/gym/envs/doom/doom_env.py new file mode 100644 index 000000000..06c1f7581 --- /dev/null +++ b/gym/envs/doom/doom_env.py @@ -0,0 +1,61 @@ +import logging +from time import sleep + +import numpy + +import gym + +try: + import doom_py +except ImportError as e: + raise gym.error.DependencyNotInstalled("{}. (HINT: you can install Doom dependencies with 'pip install gym[doom].)'".format(e)) + +logger = logging.getLogger(__name__) + +class DoomEnv(gym.Env): + metadata = {'render.modes': ['human', 'rgb_array']} + + def _step(self, action): + # action is a np array but DoomGame.make_action expects a list of ints + list_action = [int(x) for x in action] + try: + state = self.game.get_state() + reward = self.game.make_action(list_action) + if self.game.is_episode_finished(): + is_finished = True + else: + is_finished = False + return state.image_buffer.copy(), reward, is_finished, {} + + except doom_py.vizdoom.doom_is_not_running_exception: + return [], 0, True, {} + + def _reset(self): + self.game.new_episode() + return self.game.get_state().image_buffer.copy() + + def _render(self, mode='human', close=False): + if close: + if self.viewer is not None: + self.viewer.close() + return + try: + state = self.game.get_state() + img = state.image_buffer + if mode == 'rgb_array': + # VizDoom returns None if the episode is finished, let's make it + # an empty image so the recorder doesn't stop + if img is None: + return numpy.zeros((self.screen_height, self.screen_width, 3), dtype=numpy.uint8) + return img + elif mode is 'human': + from gym.envs.classic_control import rendering + if self.viewer is None: + self.viewer = rendering.SimpleImageViewer() + self.viewer.imshow(img) + sleep(0.02857) # 35 fps = 0.02857 sleep between frames + except doom_py.vizdoom.doom_is_not_running_exception: + pass # Doom has been closed + + def _close(self): + self.game.close() diff --git a/gym/envs/doom/doom_health_gathering.py b/gym/envs/doom/doom_health_gathering.py new file mode 100644 index 000000000..393be0581 --- /dev/null +++ b/gym/envs/doom/doom_health_gathering.py @@ -0,0 +1,55 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomHealthGatheringEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 5 - Health Gathering ------------ + This map is a guide on how to survive by collecting health packs. + It is a rectangle with green, acidic floor which hurts the player + periodically. There are also medkits spread around the map, and + additional kits will spawn at interval. + + Allowed actions: + [12] - MOVE_FORWARD - Move forward - Values 0 or 1 + [13] - TURN_RIGHT - Turn right - Values 0 or 1 + [14] - TURN_LEFT - Turn left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + + 1 - Several times per second - Survive as long as possible + -100 - Death penalty + + Goal: 1000 points + Stay alive long enough to reach 1,000 points (~ 30 secs) + + Ends when: + - Player is dead + - Timeout (60 seconds - 2,100 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/health_gathering.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('health_gathering.wad')) + self.game.set_doom_map('map01') + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 3 allowed actions [12, 13, 14] (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_my_way_home.py b/gym/envs/doom/doom_my_way_home.py new file mode 100644 index 000000000..ef3d6d87c --- /dev/null +++ b/gym/envs/doom/doom_my_way_home.py @@ -0,0 +1,54 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomMyWayHomeEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 6 - My Way Home ------------ + This map is designed to improve navigational skills. It is a series of + interconnected rooms and 1 corridor with a dead end. Each room + has a separate color. There is a green vest in one of the room. + The vest is always in the same room. Player must find the vest. + + Allowed actions: + [12] - MOVE_FORWARD - Move forward - Values 0 or 1 + [13] - TURN_RIGHT - Turn right - Values 0 or 1 + [14] - TURN_LEFT - Turn left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + + 1 - Finding the vest + -0.0001 - Several times per second - Find the vest quick! + + Goal: 0.50 point + Find the vest + + Ends when: + - Vest is found + - Timeout (2 minutes - 4,200 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/my_way_home.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('my_way_home.wad')) + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 3 allowed actions [12, 13, 14] (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_predict_position.py b/gym/envs/doom/doom_predict_position.py new file mode 100644 index 000000000..d14eaa18f --- /dev/null +++ b/gym/envs/doom/doom_predict_position.py @@ -0,0 +1,60 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomPredictPositionEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 7 - Predict Position ------------ + This map is designed to train you on using a rocket launcher. + It is a rectangular map with a monster on the opposite side. You need + to use your rocket launcher to kill it. The rocket adds a delay between + the moment it is fired and the moment it reaches the other side of the room. + You need to predict the position of the monster to kill it. + + Allowed actions: + [0] - ATTACK - Shoot weapon - Values 0 or 1 + [13] - TURN_RIGHT - Turn right - Values 0 or 1 + [14] - TURN_LEFT - Turn left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + + 1 - Killing the monster + -0.0001 - Several times per second - Kill the monster faster! + + Goal: 0.5 point + Kill the monster + + Hint: Missile launcher takes longer to load. You must wait a good second after the game starts + before trying to fire it. + + Ends when: + - Monster is dead + - Out of missile (you only have one) + - Timeout (20 seconds - 700 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/predict_position.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('predict_position.wad')) + self.game.set_doom_map('map01') + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 3 allowed actions [0, 13, 14] (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/envs/doom/doom_take_cover.py b/gym/envs/doom/doom_take_cover.py new file mode 100644 index 000000000..bc76d69ca --- /dev/null +++ b/gym/envs/doom/doom_take_cover.py @@ -0,0 +1,52 @@ +import logging +import os + +import numpy as np + +from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader +from gym import error, spaces +from gym.envs.doom import doom_env + +logger = logging.getLogger(__name__) + +class DoomTakeCoverEnv(doom_env.DoomEnv): + """ + ------------ Training Mission 8 - Take Cover ------------ + This map is to train you on the damage of incoming missiles. + It is a rectangular map with monsters firing missiles and fireballs + at you. You need to survive as long as possible. + + Allowed actions: + [9] - MOVE_RIGHT - Move to the right - Values 0 or 1 + [10] - MOVE_LEFT - Move to the left - Values 0 or 1 + Note: see controls.md for details + + Rewards: + + 1 - Several times per second - Survive as long as possible + + Goal: 750 points + Survive for ~ 20 seconds + + Ends when: + - Player is dead (one or two fireballs should be enough to kill you) + - Timeout (60 seconds - 2,100 frames) + ----------------------------------------------------- + """ + def __init__(self): + package_directory = os.path.dirname(os.path.abspath(__file__)) + self.loader = Loader() + self.game = DoomGame() + self.game.load_config(os.path.join(package_directory, 'assets/take_cover.cfg')) + self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) + self.game.set_doom_game_path(self.loader.get_freedoom_path()) + self.game.set_doom_scenario_path(self.loader.get_scenario_path('take_cover.wad')) + self.game.set_doom_map('map01') + self.screen_height = 480 # Must match .cfg file + self.screen_width = 640 # Must match .cfg file + # 2 allowed actions [9, 10] (must match .cfg file) + self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2)) + self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) + self.game.set_window_visible(False) + self.viewer = None + self.game.init() + self.game.new_episode() diff --git a/gym/scoreboard/__init__.py b/gym/scoreboard/__init__.py index d35dac5e2..2b44590b8 100644 --- a/gym/scoreboard/__init__.py +++ b/gym/scoreboard/__init__.py @@ -57,6 +57,12 @@ add_group( description='Simple text environments to get you started.' ) +add_group( + id='doom', + name='Doom', + description='Doom environments based on VizDoom.' +) + # classic control add_task( @@ -456,6 +462,61 @@ The game is simulated through the Arcade Learning Environment [ALE]_, which uses """, ) +# doom +add_task( + id='DoomBasic-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomCorridor-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomDefendCenter-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomDefendLine-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomHealthGathering-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomMyWayHome-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomPredictPosition-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomTakeCover-v0', + group='doom', + experimental=True, +) + +add_task( + id='DoomDeathmatch-v0', + group='doom', + experimental=True, +) + # Deprecated # MuJoCo diff --git a/gym/spaces/__init__.py b/gym/spaces/__init__.py index 6b88201d1..d6c650b76 100644 --- a/gym/spaces/__init__.py +++ b/gym/spaces/__init__.py @@ -1,5 +1,6 @@ from .box import Box from .discrete import Discrete +from .high_low import HighLow from .tuple_space import Tuple -__all__ = ["Box", "Discrete", "Tuple"] +__all__ = ["Box", "Discrete", "HighLow", "Tuple"] diff --git a/gym/spaces/high_low.py b/gym/spaces/high_low.py new file mode 100644 index 000000000..91d87a53c --- /dev/null +++ b/gym/spaces/high_low.py @@ -0,0 +1,57 @@ +from gym import Space +import numpy as np + +class HighLow(Space): + """ + A matrix of dimensions n x 3, where + + - n is the number of options in the space (e.g. buttons that can be pressed simultaneously) + - u[1] (the first column) is the minimum value (inclusive) that the option can have + - u[2] (the second column) is the maximum value (inclusive) that the option can have + - u[3] (the third column) is the precision (0 = rounded to integer, 2 = rounded to 2 decimals) + + e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1) + the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ] + """ + def __init__(self, matrix): + """ + A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column + is the maximum (inclusive), and the third column is the precision (number of decimals to keep) + + e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]]) + """ + (num_rows, num_cols) = matrix.shape + assert num_rows >= 1 + assert num_cols == 3 + self.matrix = matrix + self.num_rows = num_rows + + def sample(self): + # For each row: round(random .* (max - min) + min, precision) + max_minus_min = self.matrix[:, 1] - self.matrix[:, 0] + random_matrix = np.multiply(max_minus_min, np.random.rand(self.num_rows, 1)) + self.matrix[:, 0] + rounded_matrix = np.zeros(self.num_rows) + for i in range(self.num_rows): + rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2])) + return rounded_matrix + + def contains(self, x): + if x.shape[0] != self.num_rows: + return False + for i in range(self.num_rows): + if not (self.matrix[i, 0] <= x[i] <= self.matrix[i, 1]): + return False + return True + + def to_jsonable(self, sample_n): + return np.array(sample_n).tolist() + def from_jsonable(self, sample_n): + return [np.asarray(sample) for sample in sample_n] + + @property + def shape(self): + return self.matrix.shape + def __repr__(self): + return "High-Low" + str(self.shape) + def __eq__(self, other): + return self.matrix == other.matrix diff --git a/gym/spaces/tests/test_spaces.py b/gym/spaces/tests/test_spaces.py index f2b244fb3..cf9fcd7e7 100644 --- a/gym/spaces/tests/test_spaces.py +++ b/gym/spaces/tests/test_spaces.py @@ -3,12 +3,13 @@ import json # note: ujson fails this test due to float equality import numpy as np from nose2 import tools -from gym.spaces import Tuple, Box, Discrete +from gym.spaces import Tuple, Box, Discrete, HighLow @tools.params(Discrete(3), Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(5), Box(np.array([0,0]),np.array([1,5]))]), - Tuple((Discrete(5), Discrete(2), Discrete(2))) + Tuple((Discrete(5), Discrete(2), Discrete(2))), + HighLow(np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])), ) def test_roundtripping(space): sample_1 = space.sample() diff --git a/setup.py b/setup.py index 59424d8d9..4950a6eba 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ setup(name='gym', 'all': ['atari_py>=0.0.17', 'Pillow', 'PyOpenGL', 'pachi-py>=0.0.19', 'box2d-py', + 'doom-py', 'mujoco_py>=0.4.3', 'imageio'], # Environment-specific dependencies. Keep these in sync with @@ -30,8 +31,9 @@ setup(name='gym', 'board_game' : ['pachi-py>=0.0.19'], 'box2d': ['box2d-py'], 'classic_control': ['PyOpenGL'], + 'doom': ['doom-py'], 'mujoco': ['mujoco_py>=0.4.3', 'imageio'], }, - package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']}, + package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png', 'envs/doom/assets/*.cfg']}, tests_require=['nose2', 'mock'], )