Add Doom envs (group key doom)

This commit is contained in:
Jie Tang
2016-05-17 00:46:03 -07:00
parent 2d45775e38
commit 4950a7e061
27 changed files with 1256 additions and 4 deletions

View File

@@ -254,3 +254,51 @@ register(
'board_size': 9,
},
)
# Doom
# ----------------------------------------
register(
id='DoomBasic-v0',
entry_point='gym.envs.doom:DoomBasicEnv',
)
register(
id='DoomCorridor-v0',
entry_point='gym.envs.doom:DoomCorridorEnv',
)
register(
id='DoomDefendCenter-v0',
entry_point='gym.envs.doom:DoomDefendCenterEnv',
)
register(
id='DoomDefendLine-v0',
entry_point='gym.envs.doom:DoomDefendLineEnv',
)
register(
id='DoomHealthGathering-v0',
entry_point='gym.envs.doom:DoomHealthGatheringEnv',
)
register(
id='DoomMyWayHome-v0',
entry_point='gym.envs.doom:DoomMyWayHomeEnv',
)
register(
id='DoomPredictPosition-v0',
entry_point='gym.envs.doom:DoomPredictPositionEnv',
)
register(
id='DoomTakeCover-v0',
entry_point='gym.envs.doom:DoomTakeCoverEnv',
)
register(
id='DoomDeathmatch-v0',
entry_point='gym.envs.doom:DoomDeathmatchEnv',
)

10
gym/envs/doom/__init__.py Normal file
View File

@@ -0,0 +1,10 @@
from gym.envs.doom.doom_env import DoomEnv
from gym.envs.doom.doom_basic import DoomBasicEnv
from gym.envs.doom.doom_corridor import DoomCorridorEnv
from gym.envs.doom.doom_defend_center import DoomDefendCenterEnv
from gym.envs.doom.doom_defend_line import DoomDefendLineEnv
from gym.envs.doom.doom_health_gathering import DoomHealthGatheringEnv
from gym.envs.doom.doom_my_way_home import DoomMyWayHomeEnv
from gym.envs.doom.doom_predict_position import DoomPredictPositionEnv
from gym.envs.doom.doom_take_cover import DoomTakeCoverEnv
from gym.envs.doom.doom_deathmatch import DoomDeathmatchEnv

View File

@@ -0,0 +1,44 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards (Negative living reward means you lose points for staying alive, and need to finish asap)
living_reward = -1
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = True
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# make episodes finish after 35 tics (10 seconds)
episode_timeout = 350
# Available buttons
available_buttons =
{
ATTACK
MOVE_RIGHT
MOVE_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO2
}
mode = PLAYER
doom_skill = 5
sound_enabled = false

View File

@@ -0,0 +1,47 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards (Large penalty for being killed)
death_penalty = 100
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# Episode Start Time (Immediate)
episode_start_time = 0
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
ATTACK
MOVE_RIGHT
MOVE_LEFT
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO2
}
mode = PLAYER
doom_skill = 5
sound_enabled = false

View File

@@ -0,0 +1,92 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# Immediate Start
episode_start_time = 0
# Make episodes finish after 3 minutes (6300 ticks)
episode_timeout = 6300
# Available buttons
available_buttons =
{
ATTACK
USE
JUMP
CROUCH
TURN180
RELOAD
ZOOM
SPEED
STRAFE
MOVE_RIGHT
MOVE_LEFT
MOVE_BACKWARD
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
LOOK_UP
LOOK_DOWN
LAND
SELECT_WEAPON1
SELECT_WEAPON2
SELECT_WEAPON3
SELECT_WEAPON4
SELECT_WEAPON5
SELECT_WEAPON6
SELECT_WEAPON7
SELECT_WEAPON8
SELECT_WEAPON9
SELECT_WEAPON0
SELECT_NEXT_WEAPON
SELECT_PREV_WEAPON
DROP_SELECTED_WEAPON
ACTIVATE_SELECTED_WEAPON
SELECT_NEXT_ITEM
SELECT_PREV_ITEM
DROP_SELECTED_ITEM
LOOK_UP_DOWN_DELTA
TURN_LEFT_RIGHT_DELTA
MOVE_FORWARD_BACKWARD_DELTA
MOVE_LEFT_RIGHT_DELTA
MOVE_UP_DOWN_DELTA
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
}
mode = PLAYER
doom_skill = 5
sound_enabled = false

View File

@@ -0,0 +1,44 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
death_penalty = 1
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
ATTACK
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO2
}
mode = PLAYER
doom_skill = 3
sound_enabled = false

View File

@@ -0,0 +1,44 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
death_penalty = 1
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
ATTACK
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO2
}
mode = PLAYER
doom_skill = 5
sound_enabled = false

View File

@@ -0,0 +1,46 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards (Bonus for staying alive, large penalty for being killed)
living_reward = 1
death_penalty = 100
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = false
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO2
}
mode = PLAYER
doom_skill = 5
sound_enabled = false

View File

@@ -0,0 +1,45 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
living_reward = -0.0001
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = false
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 4200 tics (2 minutes)
episode_timeout = 4200
# Available buttons
available_buttons =
{
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO0
AMMO2
}
mode = PLAYER
doom_skill = 5
sound_enabled = false

View File

@@ -0,0 +1,45 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
living_reward = -0.0001
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 16 tics (after unholstering the rocket launcher) (35 tics per seconds)
episode_start_time = 16
# Make episodes finish after 700 tics (20 seconds)
episode_timeout = 700
# Available buttons
available_buttons =
{
ATTACK
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO2
}
mode = PLAYER
doom_skill = 3
sound_enabled = false

View File

@@ -0,0 +1,43 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
living_reward = 1
# Rendering options
screen_resolution = RES_640x480
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = false
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
MOVE_RIGHT
MOVE_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
HEALTH
ARMOR
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO2
}
mode = PLAYER
doom_skill = 5
sound_enabled = false

63
gym/envs/doom/controls.md Normal file
View File

@@ -0,0 +1,63 @@
###Controls
Doom is usually played with a full keyboard, and multiple keys can be pressed at once.
To replicate this, we broke down the possible actions in 40 keys. Each key can be pressed (value of 1), or unpressed (value of 0).
The deltas (35 to 39) indicate speed of change (values 0 to 10), where higher values will make the player move faster on an axis.
A list of values is expected to be passed as the action (e.g. [0, 1, 0, 0, 1, 0, .... ]).
Each map is restricted on what actions can be performed, but the mapping is the same across all maps.
For example, if we want to [0] - ATTACK, [2] - JUMP, and [12] - MOVE_FORWARD at the same time, we would submit the following action:
```python
action = [0] * 40
action[0] = 1
action[2] = 1
action[12] = 1
```
The full list of possible actions is:
* [0] - ATTACK - Shoot weapon - Values 0 or 1
* [1] - USE - Use item - Values 0 or 1
* [2] - JUMP - Jump - Values 0 or 1
* [3] - CROUCH - Crouch - Values 0 or 1
* [4] - TURN180 - Perform 180 turn - Values 0 or 1
* [5] - RELOAD - Reload weapon - Values 0 or 1
* [6] - ZOOM - Toggle zoom in/out - Values 0 or 1
* [7] - SPEED - Run faster - Values 0 or 1
* [8] - STRAFE - Strafe (moving sideways in a circle) - Values 0 or 1
* [9] - MOVE_RIGHT - Move to the right - Values 0 or 1
* [10] - MOVE_LEFT - Move to the left - Values 0 or 1
* [11] - MOVE_BACKWARD - Move backward - Values 0 or 1
* [12] - MOVE_FORWARD - Move forward - Values 0 or 1
* [13] - TURN_RIGHT - Turn right - Values 0 or 1
* [14] - TURN_LEFT - Turn left - Values 0 or 1
* [15] - LOOK_UP - Look up - Values 0 or 1
* [16] - LOOK_DOWN - Look down - Values 0 or 1
* [17] - LAND - Land (e.g. drop from ladder) - Values 0 or 1
* [18] - SELECT_WEAPON1 - Select weapon 1 - Values 0 or 1
* [19] - SELECT_WEAPON2 - Select weapon 2 - Values 0 or 1
* [20] - SELECT_WEAPON3 - Select weapon 3 - Values 0 or 1
* [21] - SELECT_WEAPON4 - Select weapon 4 - Values 0 or 1
* [22] - SELECT_WEAPON5 - Select weapon 5 - Values 0 or 1
* [23] - SELECT_WEAPON6 - Select weapon 6 - Values 0 or 1
* [24] - SELECT_WEAPON7 - Select weapon 7 - Values 0 or 1
* [25] - SELECT_WEAPON8 - Select weapon 8 - Values 0 or 1
* [26] - SELECT_WEAPON9 - Select weapon 9 - Values 0 or 1
* [27] - SELECT_WEAPON0 - Select weapon 0 - Values 0 or 1
* [28] - SELECT_NEXT_WEAPON - Select next weapon - Values 0 or 1
* [29] - SELECT_PREV_WEAPON - Select previous weapon - Values 0 or 1
* [30] - DROP_SELECTED_WEAPON - Drop selected weapon - Values 0 or 1
* [31] - ACTIVATE_SELECTED_WEAPON - Activate selected weapon - Values 0 or 1
* [32] - SELECT_NEXT_ITEM - Select next item - Values 0 or 1
* [33] - SELECT_PREV_ITEM - Select previous item - Values 0 or 1
* [34] - DROP_SELECTED_ITEM - Drop selected item - Values 0 or 1
* [35] - LOOK_UP_DOWN_DELTA - Look Up - Values 0 to 10 (Higher value increases speed)
* [36] - TURN_LEFT_RIGHT_DELTA - Turn left/right - Values 0 to 10 (Higher value increases speed)
* [37] - MOVE_FORWARD_BACKWARD_DELTA - Move forward/backward - Values 0 to 10 (Higher value increases speed)
* [38] - MOVE_LEFT_RIGHT_DELTA - Move left/right - Values 0 to 10 (Higher value increases speed)
* [39] - MOVE_UP_DOWN_DELTA - Move up/down - Values 0 to 10 (Higher value increases speed)

View File

@@ -0,0 +1,57 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomBasicEnv(doom_env.DoomEnv):
"""
------------ Training Mission 1 - Basic ------------
This map is rectangular with gray walls, ceiling and floor.
You are spawned in the center of the longer wall, and a red
circular monster is spawned randomly on the opposite wall.
You need to kill the monster (one bullet is enough).
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[9] - MOVE_RIGHT - Move to the right - Values 0 or 1
[10] - MOVE_LEFT - Move to the left - Values 0 or 1
Note: see controls.md for details
Rewards:
+101 - Killing the monster
- 5 - Missing a shot
- 1 - Several times per second - Kill the monster faster!
Goal: 10 points
Kill the monster in 3 secs with 1 shot
Ends when:
- Monster is dead
- Player is dead
- Timeout (10 seconds - 350 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/basic.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('basic.wad'))
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -0,0 +1,58 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomCorridorEnv(doom_env.DoomEnv):
"""
------------ Training Mission 2 - Corridor ------------
This map is designed to improve your navigation. There is a vest
at the end of the corridor, with 6 enemies (3 groups of 2). Your goal
is to get to the vest as soon as possible, without being killed.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[9] - MOVE_RIGHT - Move to the right - Values 0 or 1
[10] - MOVE_LEFT - Move to the left - Values 0 or 1
[12] - MOVE_FORWARD - Move forward - Values 0 or 1
[13] - TURN_RIGHT - Turn right - Values 0 or 1
[14] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ dX - For getting closer to the vest
- dX - For getting further from the vest
-100 - Penalty for being killed
Goal: 1,270 points
Reach the vest (try also killing guards, rather than just running)
Ends when:
- Player touches vest
- Player is dead
- Timeout (1 minutes - 2,100 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/deadly_corridor.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('deadly_corridor.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# action indexes are [0, 9, 10, 12, 13, 14]
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -0,0 +1,48 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomDeathmatchEnv(doom_env.DoomEnv):
"""
------------ Final Mission - Deathmatch ------------
Kill as many monsters as possible without being killed.
Allowed actions:
ALL
Note: see controls.md for details
Rewards:
+1 - Killing a monster
Goal: 25 points
Kill 25 monsters without being killed
Ends when:
- Player is dead
- Timeout (3 minutes - 6,300 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/deathmatch.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('deathmatch.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 41 allowed actions (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 36 + [[0, 10, 0]] * 5))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -0,0 +1,57 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomDefendCenterEnv(doom_env.DoomEnv):
"""
------------ Training Mission 3 - Defend the Center ------------
This map is designed to teach you how to kill and how to stay alive.
You will also need to keep an eye on your ammunition level. You are only
rewarded for kills, so figure out how to stay alive.
The map is a circle with monsters in the middle. Monsters will
respawn with additional health when killed. Kill as many as you can
before you run out of ammo.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[13] - TURN_RIGHT - Turn right - Values 0 or 1
[14] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Killing the monster
- 1 - Penalty for being killed
Goal: 10 points
Kill 10 monsters (you have 26 ammo)
Ends when:
- Player is dead
- Timeout (60 seconds - 2100 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/defend_the_center.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_center.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -0,0 +1,57 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomDefendLineEnv(doom_env.DoomEnv):
"""
------------ Training Mission 4 - Defend the Line ------------
This map is designed to teach you how to kill and how to stay alive.
Your ammo will automatically replenish. You are only rewarded for kills,
so figure out how to stay alive.
The map is a rectangle with monsters in the middle. Monsters will
respawn with additional health when killed. Kill as many as you can
before they kill you. This map is harder than the previous.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[13] - TURN_RIGHT - Turn right - Values 0 or 1
[14] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Killing the monster
- 1 - Penalty for being killed
Goal: 25 points
Kill 25 monsters
Ends when:
- Player is dead
- Timeout (60 seconds - 2100 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/defend_the_line.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('defend_the_line.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

61
gym/envs/doom/doom_env.py Normal file
View File

@@ -0,0 +1,61 @@
import logging
from time import sleep
import numpy
import gym
try:
import doom_py
except ImportError as e:
raise gym.error.DependencyNotInstalled("{}. (HINT: you can install Doom dependencies with 'pip install gym[doom].)'".format(e))
logger = logging.getLogger(__name__)
class DoomEnv(gym.Env):
metadata = {'render.modes': ['human', 'rgb_array']}
def _step(self, action):
# action is a np array but DoomGame.make_action expects a list of ints
list_action = [int(x) for x in action]
try:
state = self.game.get_state()
reward = self.game.make_action(list_action)
if self.game.is_episode_finished():
is_finished = True
else:
is_finished = False
return state.image_buffer.copy(), reward, is_finished, {}
except doom_py.vizdoom.doom_is_not_running_exception:
return [], 0, True, {}
def _reset(self):
self.game.new_episode()
return self.game.get_state().image_buffer.copy()
def _render(self, mode='human', close=False):
if close:
if self.viewer is not None:
self.viewer.close()
return
try:
state = self.game.get_state()
img = state.image_buffer
if mode == 'rgb_array':
# VizDoom returns None if the episode is finished, let's make it
# an empty image so the recorder doesn't stop
if img is None:
return numpy.zeros((self.screen_height, self.screen_width, 3), dtype=numpy.uint8)
return img
elif mode is 'human':
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(img)
sleep(0.02857) # 35 fps = 0.02857 sleep between frames
except doom_py.vizdoom.doom_is_not_running_exception:
pass # Doom has been closed
def _close(self):
self.game.close()

View File

@@ -0,0 +1,55 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomHealthGatheringEnv(doom_env.DoomEnv):
"""
------------ Training Mission 5 - Health Gathering ------------
This map is a guide on how to survive by collecting health packs.
It is a rectangle with green, acidic floor which hurts the player
periodically. There are also medkits spread around the map, and
additional kits will spawn at interval.
Allowed actions:
[12] - MOVE_FORWARD - Move forward - Values 0 or 1
[13] - TURN_RIGHT - Turn right - Values 0 or 1
[14] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Several times per second - Survive as long as possible
-100 - Death penalty
Goal: 1000 points
Stay alive long enough to reach 1,000 points (~ 30 secs)
Ends when:
- Player is dead
- Timeout (60 seconds - 2,100 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/health_gathering.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('health_gathering.wad'))
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -0,0 +1,54 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomMyWayHomeEnv(doom_env.DoomEnv):
"""
------------ Training Mission 6 - My Way Home ------------
This map is designed to improve navigational skills. It is a series of
interconnected rooms and 1 corridor with a dead end. Each room
has a separate color. There is a green vest in one of the room.
The vest is always in the same room. Player must find the vest.
Allowed actions:
[12] - MOVE_FORWARD - Move forward - Values 0 or 1
[13] - TURN_RIGHT - Turn right - Values 0 or 1
[14] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Finding the vest
-0.0001 - Several times per second - Find the vest quick!
Goal: 0.50 point
Find the vest
Ends when:
- Vest is found
- Timeout (2 minutes - 4,200 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/my_way_home.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('my_way_home.wad'))
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [12, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -0,0 +1,60 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomPredictPositionEnv(doom_env.DoomEnv):
"""
------------ Training Mission 7 - Predict Position ------------
This map is designed to train you on using a rocket launcher.
It is a rectangular map with a monster on the opposite side. You need
to use your rocket launcher to kill it. The rocket adds a delay between
the moment it is fired and the moment it reaches the other side of the room.
You need to predict the position of the monster to kill it.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[13] - TURN_RIGHT - Turn right - Values 0 or 1
[14] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Killing the monster
-0.0001 - Several times per second - Kill the monster faster!
Goal: 0.5 point
Kill the monster
Hint: Missile launcher takes longer to load. You must wait a good second after the game starts
before trying to fire it.
Ends when:
- Monster is dead
- Out of missile (you only have one)
- Timeout (20 seconds - 700 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/predict_position.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('predict_position.wad'))
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 3 allowed actions [0, 13, 14] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -0,0 +1,52 @@
import logging
import os
import numpy as np
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
from gym import error, spaces
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomTakeCoverEnv(doom_env.DoomEnv):
"""
------------ Training Mission 8 - Take Cover ------------
This map is to train you on the damage of incoming missiles.
It is a rectangular map with monsters firing missiles and fireballs
at you. You need to survive as long as possible.
Allowed actions:
[9] - MOVE_RIGHT - Move to the right - Values 0 or 1
[10] - MOVE_LEFT - Move to the left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Several times per second - Survive as long as possible
Goal: 750 points
Survive for ~ 20 seconds
Ends when:
- Player is dead (one or two fireballs should be enough to kill you)
- Timeout (60 seconds - 2,100 frames)
-----------------------------------------------------
"""
def __init__(self):
package_directory = os.path.dirname(os.path.abspath(__file__))
self.loader = Loader()
self.game = DoomGame()
self.game.load_config(os.path.join(package_directory, 'assets/take_cover.cfg'))
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
self.game.set_doom_scenario_path(self.loader.get_scenario_path('take_cover.wad'))
self.game.set_doom_map('map01')
self.screen_height = 480 # Must match .cfg file
self.screen_width = 640 # Must match .cfg file
# 2 allowed actions [9, 10] (must match .cfg file)
self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 2))
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
self.game.set_window_visible(False)
self.viewer = None
self.game.init()
self.game.new_episode()

View File

@@ -57,6 +57,12 @@ add_group(
description='Simple text environments to get you started.'
)
add_group(
id='doom',
name='Doom',
description='Doom environments based on VizDoom.'
)
# classic control
add_task(
@@ -456,6 +462,61 @@ The game is simulated through the Arcade Learning Environment [ALE]_, which uses
""",
)
# doom
add_task(
id='DoomBasic-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomCorridor-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomDefendCenter-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomDefendLine-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomHealthGathering-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomMyWayHome-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomPredictPosition-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomTakeCover-v0',
group='doom',
experimental=True,
)
add_task(
id='DoomDeathmatch-v0',
group='doom',
experimental=True,
)
# Deprecated
# MuJoCo

View File

@@ -1,5 +1,6 @@
from .box import Box
from .discrete import Discrete
from .high_low import HighLow
from .tuple_space import Tuple
__all__ = ["Box", "Discrete", "Tuple"]
__all__ = ["Box", "Discrete", "HighLow", "Tuple"]

57
gym/spaces/high_low.py Normal file
View File

@@ -0,0 +1,57 @@
from gym import Space
import numpy as np
class HighLow(Space):
"""
A matrix of dimensions n x 3, where
- n is the number of options in the space (e.g. buttons that can be pressed simultaneously)
- u[1] (the first column) is the minimum value (inclusive) that the option can have
- u[2] (the second column) is the maximum value (inclusive) that the option can have
- u[3] (the third column) is the precision (0 = rounded to integer, 2 = rounded to 2 decimals)
e.g. if the space is composed of ATTACK (values: 0-100), MOVE_LEFT(0-1), MOVE_RIGHT(0,1)
the space would be [ [0.0, 100.0, 2], [0, 1, 0], [0, 1, 0] ]
"""
def __init__(self, matrix):
"""
A matrix of shape (n, 3), where the first column is the minimum (inclusive), the second column
is the maximum (inclusive), and the third column is the precision (number of decimals to keep)
e.g. np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])
"""
(num_rows, num_cols) = matrix.shape
assert num_rows >= 1
assert num_cols == 3
self.matrix = matrix
self.num_rows = num_rows
def sample(self):
# For each row: round(random .* (max - min) + min, precision)
max_minus_min = self.matrix[:, 1] - self.matrix[:, 0]
random_matrix = np.multiply(max_minus_min, np.random.rand(self.num_rows, 1)) + self.matrix[:, 0]
rounded_matrix = np.zeros(self.num_rows)
for i in range(self.num_rows):
rounded_matrix[i] = round(random_matrix[i, 0], int(self.matrix[i, 2]))
return rounded_matrix
def contains(self, x):
if x.shape[0] != self.num_rows:
return False
for i in range(self.num_rows):
if not (self.matrix[i, 0] <= x[i] <= self.matrix[i, 1]):
return False
return True
def to_jsonable(self, sample_n):
return np.array(sample_n).tolist()
def from_jsonable(self, sample_n):
return [np.asarray(sample) for sample in sample_n]
@property
def shape(self):
return self.matrix.shape
def __repr__(self):
return "High-Low" + str(self.shape)
def __eq__(self, other):
return self.matrix == other.matrix

View File

@@ -3,12 +3,13 @@ import json # note: ujson fails this test due to float equality
import numpy as np
from nose2 import tools
from gym.spaces import Tuple, Box, Discrete
from gym.spaces import Tuple, Box, Discrete, HighLow
@tools.params(Discrete(3),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(np.array([0,0]),np.array([1,5]))]),
Tuple((Discrete(5), Discrete(2), Discrete(2)))
Tuple((Discrete(5), Discrete(2), Discrete(2))),
HighLow(np.matrix([[0, 1, 0], [0, 1, 0], [0.0, 100.0, 2]])),
)
def test_roundtripping(space):
sample_1 = space.sample()

View File

@@ -22,6 +22,7 @@ setup(name='gym',
'all': ['atari_py>=0.0.17', 'Pillow', 'PyOpenGL',
'pachi-py>=0.0.19',
'box2d-py',
'doom-py',
'mujoco_py>=0.4.3', 'imageio'],
# Environment-specific dependencies. Keep these in sync with
@@ -30,8 +31,9 @@ setup(name='gym',
'board_game' : ['pachi-py>=0.0.19'],
'box2d': ['box2d-py'],
'classic_control': ['PyOpenGL'],
'doom': ['doom-py'],
'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
},
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png', 'envs/doom/assets/*.cfg']},
tests_require=['nose2', 'mock'],
)