Moved doom to gym_doom (#319)

This commit is contained in:
Philip Paquette
2016-08-24 11:24:10 -04:00
committed by Greg Brockman
parent 3d29fb541b
commit 4b9984c00e
28 changed files with 3 additions and 2046 deletions

View File

@@ -212,18 +212,6 @@ These are a variety of classic control tasks, which would appear in a typical re
env.reset() env.reset()
env.render() env.render()
Doom
---------------
These tasks take place inside a Doom game (via the VizDoom project). If you didn't do the full install, you will need to run ``pip install -e '.[doom]'``. You can get started with them via:
.. code:: python
import gym
env = gym.make('DoomBasic-v0')
env.reset()
env.render()
MuJoCo MuJoCo
------ ------

View File

@@ -311,84 +311,6 @@ register(
}, },
) )
# Doom
# ----------------------------------------
register(
id='meta-Doom-v0',
entry_point='gym.envs.doom:MetaDoomEnv',
timestep_limit=999999,
reward_threshold=9000.0,
kwargs={
'average_over': 3,
'passing_grade': 600,
'min_tries_for_avg': 3
},
)
register(
id='DoomBasic-v0',
entry_point='gym.envs.doom:DoomBasicEnv',
timestep_limit=10000,
reward_threshold=10.0,
)
register(
id='DoomCorridor-v0',
entry_point='gym.envs.doom:DoomCorridorEnv',
timestep_limit=10000,
reward_threshold=1000.0,
)
register(
id='DoomDefendCenter-v0',
entry_point='gym.envs.doom:DoomDefendCenterEnv',
timestep_limit=10000,
reward_threshold=10.0,
)
register(
id='DoomDefendLine-v0',
entry_point='gym.envs.doom:DoomDefendLineEnv',
timestep_limit=10000,
reward_threshold=15.0,
)
register(
id='DoomHealthGathering-v0',
entry_point='gym.envs.doom:DoomHealthGatheringEnv',
timestep_limit=10000,
reward_threshold=1000.0,
)
register(
id='DoomMyWayHome-v0',
entry_point='gym.envs.doom:DoomMyWayHomeEnv',
timestep_limit=10000,
reward_threshold=0.5,
)
register(
id='DoomPredictPosition-v0',
entry_point='gym.envs.doom:DoomPredictPositionEnv',
timestep_limit=10000,
reward_threshold=0.5,
)
register(
id='DoomTakeCover-v0',
entry_point='gym.envs.doom:DoomTakeCoverEnv',
timestep_limit=10000,
reward_threshold=750.0,
)
register(
id='DoomDeathmatch-v0',
entry_point='gym.envs.doom:DoomDeathmatchEnv',
timestep_limit=10000,
reward_threshold=20.0,
)
# Debugging # Debugging
# ---------------------------------------- # ----------------------------------------

View File

@@ -1,10 +0,0 @@
from gym.envs.doom.doom_env import DoomEnv, MetaDoomEnv
from gym.envs.doom.doom_basic import DoomBasicEnv
from gym.envs.doom.doom_corridor import DoomCorridorEnv
from gym.envs.doom.doom_defend_center import DoomDefendCenterEnv
from gym.envs.doom.doom_defend_line import DoomDefendLineEnv
from gym.envs.doom.doom_health_gathering import DoomHealthGatheringEnv
from gym.envs.doom.doom_my_way_home import DoomMyWayHomeEnv
from gym.envs.doom.doom_predict_position import DoomPredictPositionEnv
from gym.envs.doom.doom_take_cover import DoomTakeCoverEnv
from gym.envs.doom.doom_deathmatch import DoomDeathmatchEnv

View File

@@ -1,59 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards (Negative living reward means you lose points for staying alive, and need to finish asap)
living_reward = -1
# Rendering options
screen_format = BGR24
render_hud = True
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# make episodes finish after 35 tics (10 seconds)
episode_timeout = 350
# Available buttons
available_buttons =
{
ATTACK
MOVE_RIGHT
MOVE_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,62 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards (Large penalty for being killed)
death_penalty = 100
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# Episode Start Time (Immediate)
episode_start_time = 0
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
ATTACK
MOVE_RIGHT
MOVE_LEFT
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,103 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# Immediate Start
episode_start_time = 0
# Make episodes finish after 3 minutes (6300 ticks)
episode_timeout = 6300
# Available buttons
# Currently disabled: [33] - DROP_SELECTED_WEAPON
available_buttons =
{
ATTACK
USE
JUMP
CROUCH
TURN180
# N. B. this is misspelled in vizdoom
ALATTACK
RELOAD
ZOOM
SPEED
STRAFE
MOVE_RIGHT
MOVE_LEFT
MOVE_BACKWARD
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
LOOK_UP
LOOK_DOWN
MOVE_UP
MOVE_DOWN
LAND
SELECT_WEAPON1
SELECT_WEAPON2
SELECT_WEAPON3
SELECT_WEAPON4
SELECT_WEAPON5
SELECT_WEAPON6
SELECT_WEAPON7
SELECT_WEAPON8
SELECT_WEAPON9
SELECT_WEAPON0
SELECT_NEXT_WEAPON
SELECT_PREV_WEAPON
ACTIVATE_SELECTED_WEAPON
SELECT_NEXT_ITEM
SELECT_PREV_ITEM
DROP_SELECTED_ITEM
LOOK_UP_DOWN_DELTA
TURN_LEFT_RIGHT_DELTA
MOVE_FORWARD_BACKWARD_DELTA
MOVE_LEFT_RIGHT_DELTA
MOVE_UP_DOWN_DELTA
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,59 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
death_penalty = 1
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
ATTACK
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,59 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
death_penalty = 1
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
ATTACK
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,60 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards (Bonus for staying alive, large penalty for being killed)
living_reward = 1
death_penalty = 100
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = false
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,59 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
living_reward = -0.0001
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = false
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
MOVE_FORWARD
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,59 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
living_reward = -0.0001
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = true
render_decals = false
render_particles = false
# make episodes start after 16 tics (after unholstering the rocket launcher) (35 tics per seconds)
episode_start_time = 16
# Make episodes finish after 700 tics (20 seconds)
episode_timeout = 700
# Available buttons
available_buttons =
{
ATTACK
TURN_RIGHT
TURN_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,58 +0,0 @@
# Lines starting with # are treated as comments (or with whitespaces+#).
# It doesn't matter if you use capital letters or not.
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
# Rewards
living_reward = 1
# Rendering options
screen_format = BGR24
render_hud = true
render_crosshair = false
render_weapon = false
render_decals = false
render_particles = false
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
episode_start_time = 14
# Make episodes finish after 2100 tics (1 minutes)
episode_timeout = 2100
# Available buttons
available_buttons =
{
MOVE_RIGHT
MOVE_LEFT
}
# Game variables that will be in the state
available_game_variables =
{
KILLCOUNT
ITEMCOUNT
SECRETCOUNT
FRAGCOUNT
HEALTH
ARMOR
DEAD
ON_GROUND
ATTACK_READY
ALTATTACK_READY
SELECTED_WEAPON
SELECTED_WEAPON_AMMO
AMMO1
AMMO2
AMMO3
AMMO4
AMMO5
AMMO6
AMMO7
AMMO8
AMMO9
AMMO0
}
sound_enabled = false

View File

@@ -1,83 +0,0 @@
###Controls
Doom is usually played with a full keyboard, and multiple keys can be pressed at once.
To replicate this, we broke down the possible actions in 43 keys. Each key can be pressed (value of 1), or unpressed (value of 0).
The last 5 commands are deltas. [38] - LOOK_UP_DOWN_DELTA and [39] - TURN_LEFT_RIGHT_DELTA replicate mouse movement where values are in the
range -10 to +10. They represent mouse movement over the x and y axis. (e.g. +5 for LOOK_UP_DOWN_DELTA will make the player look up 5 degrees)
[40] - MOVE_FORWARD_BACKWARD_DELTA, [41] - MOVE_LEFT_RIGHT_DELTA, and [42] - MOVE_UP_DOWN_DELTA represent the speed on an axis.
Their values range from -100 to 100, where +100 is the maximum speed in one direction, and -100 is the maximum speed in the other.
(e.g. MOVE_FORWARD_BACKWARD_DELTA of +100 will make the player move forward at 100% of max speed, and -100 will make the player
move backward at 100% of max speed).
A list of values is expected to be passed as the action (e.g. [0, 1, 0, 0, 1, 0, .... ]).
Each mission is restricted on what actions can be performed, but the mapping is the same across all missions.
For example, if we want to [0] - ATTACK, [2] - JUMP, and [13] - MOVE_FORWARD at the same time, we would submit the following action:
```python
action = [0] * 43
action[0] = 1
action[2] = 1
action[13] = 1
```
The full list of possible actions is:
* [0] - ATTACK - Shoot weapon - Values 0 or 1
* [1] - USE - Use item - Values 0 or 1
* [2] - JUMP - Jump - Values 0 or 1
* [3] - CROUCH - Crouch - Values 0 or 1
* [4] - TURN180 - Perform 180 turn - Values 0 or 1
* [5] - ALT_ATTACK - Perform alternate attack
* [6] - RELOAD - Reload weapon - Values 0 or 1
* [7] - ZOOM - Toggle zoom in/out - Values 0 or 1
* [8] - SPEED - Run faster - Values 0 or 1
* [9] - STRAFE - Strafe (moving sideways in a circle) - Values 0 or 1
* [10] - MOVE_RIGHT - Move to the right - Values 0 or 1
* [11] - MOVE_LEFT - Move to the left - Values 0 or 1
* [12] - MOVE_BACKWARD - Move backward - Values 0 or 1
* [13] - MOVE_FORWARD - Move forward - Values 0 or 1
* [14] - TURN_RIGHT - Turn right - Values 0 or 1
* [15] - TURN_LEFT - Turn left - Values 0 or 1
* [16] - LOOK_UP - Look up - Values 0 or 1
* [17] - LOOK_DOWN - Look down - Values 0 or 1
* [18] - MOVE_UP - Move up - Values 0 or 1
* [19] - MOVE_DOWN - Move down - Values 0 or 1
* [20] - LAND - Land (e.g. drop from ladder) - Values 0 or 1
* [21] - SELECT_WEAPON1 - Select weapon 1 - Values 0 or 1
* [22] - SELECT_WEAPON2 - Select weapon 2 - Values 0 or 1
* [23] - SELECT_WEAPON3 - Select weapon 3 - Values 0 or 1
* [24] - SELECT_WEAPON4 - Select weapon 4 - Values 0 or 1
* [25] - SELECT_WEAPON5 - Select weapon 5 - Values 0 or 1
* [26] - SELECT_WEAPON6 - Select weapon 6 - Values 0 or 1
* [27] - SELECT_WEAPON7 - Select weapon 7 - Values 0 or 1
* [28] - SELECT_WEAPON8 - Select weapon 8 - Values 0 or 1
* [29] - SELECT_WEAPON9 - Select weapon 9 - Values 0 or 1
* [30] - SELECT_WEAPON0 - Select weapon 0 - Values 0 or 1
* [31] - SELECT_NEXT_WEAPON - Select next weapon - Values 0 or 1
* [32] - SELECT_PREV_WEAPON - Select previous weapon - Values 0 or 1
* [33] - DROP_SELECTED_WEAPON - Drop selected weapon - Values 0 or 1
* [34] - ACTIVATE_SELECTED_WEAPON - Activate selected weapon - Values 0 or 1
* [35] - SELECT_NEXT_ITEM - Select next item - Values 0 or 1
* [36] - SELECT_PREV_ITEM - Select previous item - Values 0 or 1
* [37] - DROP_SELECTED_ITEM - Drop selected item - Values 0 or 1
* [38] - LOOK_UP_DOWN_DELTA - Look Up/Down - Range of -10 to 10 (integer).
- Value is the angle - +5 will look up 5 degrees, -5 will look down 5 degrees
* [39] - TURN_LEFT_RIGHT_DELTA - Turn Left/Right - Range of -10 to 10 (integer).
- Value is the angle - +5 will turn right 5 degrees, -5 will turn left 5 degrees
* [40] - MOVE_FORWARD_BACKWARD_DELTA - Speed of forward/backward movement - Range -100 to 100 (integer).
- +100 is max speed forward, -100 is max speed backward, 0 is no movement
* [41] - MOVE_LEFT_RIGHT_DELTA - Speed of left/right movement - Range -100 to 100 (integer).
- +100 is max speed right, -100 is max speed left, 0 is no movement
* [42] - MOVE_UP_DOWN_DELTA - Speed of up/down movement - Range -100 to 100 (integer).
- +100 is max speed up, -100 is max speed down, 0 is no movement
To control the player in 'human' mode, the following keys should work:
* Arrow Keys for MOVE_FORWARD, MOVE_BACKWARD, LEFT_TURN, RIGHT_TURN
* '<' and '>' for MOVE_RIGHT and MOVE_LEFT
* Ctrl (or left mouse click) for ATTACK

View File

@@ -1,48 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomBasicEnv(doom_env.DoomEnv):
"""
------------ Training Mission 1 - Basic ------------
This map is rectangular with gray walls, ceiling and floor.
You are spawned in the center of the longer wall, and a red
circular monster is spawned randomly on the opposite wall.
You need to kill the monster (one bullet is enough).
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[10] - MOVE_RIGHT - Move to the right - Values 0 or 1
[11] - MOVE_LEFT - Move to the left - Values 0 or 1
Note: see controls.md for details
Rewards:
+101 - Killing the monster
- 5 - Missing a shot
- 1 - 35 times per second - Kill the monster faster!
Goal: 10 points
Kill the monster in 3 secs with 1 shot
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Monster is dead
- Player is dead
- Timeout (10 seconds - 350 frames)
Actions:
actions = [0] * 43
actions[0] = 0 # ATTACK
actions[10] = 1 # MOVE_RIGHT
actions[11] = 0 # MOVE_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomBasicEnv, self).__init__(0)

View File

@@ -1,53 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomCorridorEnv(doom_env.DoomEnv):
"""
------------ Training Mission 2 - Corridor ------------
This map is designed to improve your navigation. There is a vest
at the end of the corridor, with 6 enemies (3 groups of 2). Your goal
is to get to the vest as soon as possible, without being killed.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[10] - MOVE_RIGHT - Move to the right - Values 0 or 1
[11] - MOVE_LEFT - Move to the left - Values 0 or 1
[13] - MOVE_FORWARD - Move forward - Values 0 or 1
[14] - TURN_RIGHT - Turn right - Values 0 or 1
[15] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ dX - For getting closer to the vest
- dX - For getting further from the vest
-100 - Penalty for being killed
Goal: 1,000 points
Reach the vest (or at least get past the guards in the 3rd group)
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Player touches vest
- Player is dead
- Timeout (1 minutes - 2,100 frames)
Actions:
actions = [0] * 43
actions[0] = 0 # ATTACK
actions[10] = 1 # MOVE_RIGHT
actions[11] = 0 # MOVE_LEFT
actions[13] = 0 # MOVE_FORWARD
actions[14] = 0 # TURN_RIGHT
actions[15] = 0 # TURN_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomCorridorEnv, self).__init__(1)

View File

@@ -1,45 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomDeathmatchEnv(doom_env.DoomEnv):
"""
------------ Final Mission - Deathmatch ------------
Kill as many monsters as possible without being killed.
Allowed actions:
ALL
Note: see controls.md for details
Rewards:
+1 - Killing a monster
Goal: 20 points
Kill 20 monsters
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (mouse and full keyboard)
Ends when:
- Player is dead
- Timeout (3 minutes - 6,300 frames)
Actions:
actions = [0] * 43
actions[0] = 0 # ATTACK
actions[1] = 0 # USE
[...]
actions[42] = 0 # MOVE_UP_DOWN_DELTA
A full list of possible actions is available in controls.md
Note:
actions[33] (DROP_SELECTED_WEAPON) is currently disabled, because it causes VizDoom to crash
-----------------------------------------------------
"""
def __init__(self):
super(DoomDeathmatchEnv, self).__init__(8)

View File

@@ -1,49 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomDefendCenterEnv(doom_env.DoomEnv):
"""
------------ Training Mission 3 - Defend the Center ------------
This map is designed to teach you how to kill and how to stay alive.
You will also need to keep an eye on your ammunition level. You are only
rewarded for kills, so figure out how to stay alive.
The map is a circle with monsters. You are in the middle. Monsters will
respawn with additional health when killed. Kill as many as you can
before you run out of ammo.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[14] - TURN_RIGHT - Turn right - Values 0 or 1
[15] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Killing a monster
- 1 - Penalty for being killed
Goal: 10 points
Kill 11 monsters (you have 26 ammo)
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Player is dead
- Timeout (60 seconds - 2100 frames)
Actions:
actions = [0] * 43
actions[0] = 0 # ATTACK
actions[14] = 1 # TURN_RIGHT
actions[15] = 0 # TURN_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomDefendCenterEnv, self).__init__(2)

View File

@@ -1,49 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomDefendLineEnv(doom_env.DoomEnv):
"""
------------ Training Mission 4 - Defend the Line ------------
This map is designed to teach you how to kill and how to stay alive.
Your ammo will automatically replenish. You are only rewarded for kills,
so figure out how to stay alive.
The map is a rectangle with monsters on the other side. Monsters will
respawn with additional health when killed. Kill as many as you can
before they kill you. This map is harder than the previous.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[14] - TURN_RIGHT - Turn right - Values 0 or 1
[15] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Killing a monster
- 1 - Penalty for being killed
Goal: 15 points
Kill 16 monsters
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Player is dead
- Timeout (60 seconds - 2100 frames)
Actions:
actions = [0] * 43
actions[0] = 0 # ATTACK
actions[14] = 1 # TURN_RIGHT
actions[15] = 0 # TURN_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomDefendLineEnv, self).__init__(3)

View File

@@ -1,415 +0,0 @@
import logging
import os
from time import sleep
import numpy as np
import gym
from gym import utils, spaces
from gym.utils import seeding
try:
import doom_py
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
except ImportError as e:
raise gym.error.DependencyNotInstalled("{}. (HINT: you can install Doom dependencies " +
"with 'pip install gym[doom].)'".format(e))
logger = logging.getLogger(__name__)
# Constants
NUM_ACTIONS = 43
NUM_LEVELS = 9
CONFIG = 0
SCENARIO = 1
MAP = 2
DIFFICULTY = 3
ACTIONS = 4
MIN_SCORE = 5
TARGET_SCORE = 6
# Format (config, scenario, map, difficulty, actions, min, target)
DOOM_SETTINGS = [
['basic.cfg', 'basic.wad', 'map01', 5, [0, 10, 11], -485, 10], # 0 - Basic
['deadly_corridor.cfg', 'deadly_corridor.wad', '', 1, [0, 10, 11, 13, 14, 15], -120, 1000], # 1 - Corridor
['defend_the_center.cfg', 'defend_the_center.wad', '', 5, [0, 14, 15], -1, 10], # 2 - DefendCenter
['defend_the_line.cfg', 'defend_the_line.wad', '', 5, [0, 14, 15], -1, 15], # 3 - DefendLine
['health_gathering.cfg', 'health_gathering.wad', 'map01', 5, [13, 14, 15], 0, 1000], # 4 - HealthGathering
['my_way_home.cfg', 'my_way_home.wad', '', 5, [13, 14, 15], -0.22, 0.5], # 5 - MyWayHome
['predict_position.cfg', 'predict_position.wad', 'map01', 3, [0, 14, 15], -0.075, 0.5], # 6 - PredictPosition
['take_cover.cfg', 'take_cover.wad', 'map01', 5, [10, 11], 0, 750], # 7 - TakeCover
['deathmatch.cfg', 'deathmatch.wad', '', 5, [x for x in range(NUM_ACTIONS) if x != 33], 0, 20] # 8 - Deathmatch
]
class DoomEnv(gym.Env, utils.EzPickle):
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35}
def __init__(self, level):
utils.EzPickle.__init__(self)
self.previous_level = -1
self.level = level
self.game = DoomGame()
self.loader = Loader()
self.doom_dir = os.path.dirname(os.path.abspath(__file__))
self.mode = 'fast' # 'human', 'fast' or 'normal'
self.no_render = False # To disable double rendering in human mode
self.viewer = None
self.is_initialized = False # Indicates that reset() has been called
self.curr_seed = 0
self.action_space = spaces.MultiDiscrete([[0, 1]] * 38 + [[-10, 10]] * 2 + [[-100, 100]] * 3)
self.allowed_actions = list(range(NUM_ACTIONS))
self._seed()
self._configure()
def _configure(self, screen_resolution=ScreenResolution.RES_640X480):
# Often agents end up downsampling the observations. Configuring Doom to
# return a smaller image yields significant (~10x) speedups
if screen_resolution == ScreenResolution.RES_640X480:
self.screen_height = 480
self.screen_width = 640
self.screen_resolution = ScreenResolution.RES_640X480
elif screen_resolution == ScreenResolution.RES_160X120:
self.screen_height = 120
self.screen_width = 160
self.screen_resolution = ScreenResolution.RES_160X120
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
def _load_level(self):
# Closing if is_initialized
if self.is_initialized:
self.is_initialized = False
self.game.close()
self.game = DoomGame()
# Loading Paths
if not self.is_initialized:
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
self.game.set_doom_game_path(self.loader.get_freedoom_path())
# Common settings
self._closed = False
self.game.load_config(os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG]))
self.game.set_doom_scenario_path(self.loader.get_scenario_path(DOOM_SETTINGS[self.level][SCENARIO]))
if DOOM_SETTINGS[self.level][MAP] != '':
self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP])
self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY])
self.previous_level = self.level
self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS]
self.game.set_screen_resolution(self.screen_resolution)
# Algo mode
if 'human' != self.mode:
self.game.set_window_visible(False)
self.game.set_mode(Mode.PLAYER)
self.no_render = False
self.game.init()
self._start_episode()
self.is_initialized = True
return self.game.get_state().image_buffer.copy()
# Human mode
else:
self.game.add_game_args('+freelook 1')
self.game.set_window_visible(True)
self.game.set_mode(Mode.SPECTATOR)
self.no_render = True
self.game.init()
self._start_episode()
self.is_initialized = True
self._play_human_mode()
return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
def _start_episode(self):
if self.curr_seed > 0:
self.game.set_seed(self.curr_seed)
self.curr_seed = 0
self.game.new_episode()
return
def _play_human_mode(self):
while not self.game.is_episode_finished():
self.game.advance_action()
state = self.game.get_state()
total_reward = self.game.get_total_reward()
info = self._get_game_variables(state.game_variables)
info["TOTAL_REWARD"] = round(total_reward, 4)
print('===============================')
print('State: #' + str(state.number))
print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)')
print('Reward: \t' + str(self.game.get_last_reward()))
print('Total Reward: \t' + str(total_reward))
print('Variables: \n' + str(info))
sleep(0.02857) # 35 fps = 0.02857 sleep between frames
print('===============================')
print('Done')
return
def _step(self, action):
if NUM_ACTIONS != len(action):
logger.warn('Doom action list must contain %d items. Padding missing items with 0' % NUM_ACTIONS)
old_action = action
action = [0] * NUM_ACTIONS
for i in range(len(old_action)):
action[i] = old_action[i]
# action is a list of numbers but DoomGame.make_action expects a list of ints
if len(self.allowed_actions) > 0:
list_action = [int(action[action_idx]) for action_idx in self.allowed_actions]
else:
list_action = [int(x) for x in action]
try:
reward = self.game.make_action(list_action)
state = self.game.get_state()
info = self._get_game_variables(state.game_variables)
info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4)
if self.game.is_episode_finished():
is_finished = True
return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info
else:
is_finished = False
return state.image_buffer.copy(), reward, is_finished, info
except doom_py.vizdoom.ViZDoomIsNotRunningException:
return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {}
def _reset(self):
if self.is_initialized and not self._closed:
self._start_episode()
return self.game.get_state().image_buffer.copy()
else:
return self._load_level()
def _render(self, mode='human', close=False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None # If we don't None out this reference pyglet becomes unhappy
return
try:
if 'human' == mode and self.no_render:
return
state = self.game.get_state()
img = state.image_buffer
# VizDoom returns None if the episode is finished, let's make it
# an empty image so the recorder doesn't stop
if img is None:
img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
if mode == 'rgb_array':
return img
elif mode is 'human':
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(img)
if 'normal' == self.mode:
sleep(0.02857) # 35 fps = 0.02857 sleep between frames
except doom_py.vizdoom.ViZDoomIsNotRunningException:
pass # Doom has been closed
def _close(self):
self.game.close()
def _seed(self, seed=None):
self.curr_seed = seeding.hash_seed(seed) % 2 ** 32
return [self.curr_seed]
def _get_game_variables(self, state_variables):
info = {
"LEVEL": self.level
}
if state_variables is None:
return info
info['KILLCOUNT'] = state_variables[0]
info['ITEMCOUNT'] = state_variables[1]
info['SECRETCOUNT'] = state_variables[2]
info['FRAGCOUNT'] = state_variables[3]
info['HEALTH'] = state_variables[4]
info['ARMOR'] = state_variables[5]
info['DEAD'] = state_variables[6]
info['ON_GROUND'] = state_variables[7]
info['ATTACK_READY'] = state_variables[8]
info['ALTATTACK_READY'] = state_variables[9]
info['SELECTED_WEAPON'] = state_variables[10]
info['SELECTED_WEAPON_AMMO'] = state_variables[11]
info['AMMO1'] = state_variables[12]
info['AMMO2'] = state_variables[13]
info['AMMO3'] = state_variables[14]
info['AMMO4'] = state_variables[15]
info['AMMO5'] = state_variables[16]
info['AMMO6'] = state_variables[17]
info['AMMO7'] = state_variables[18]
info['AMMO8'] = state_variables[19]
info['AMMO9'] = state_variables[20]
info['AMMO0'] = state_variables[21]
return info
class MetaDoomEnv(DoomEnv):
def __init__(self, average_over=10, passing_grade=600, min_tries_for_avg=5):
super(MetaDoomEnv, self).__init__(0)
self.average_over = average_over
self.passing_grade = passing_grade
self.min_tries_for_avg = min_tries_for_avg # Need to use at least this number of tries to calc avg
self.scores = [[]] * NUM_LEVELS
self.locked_levels = [True] * NUM_LEVELS # Locking all levels but the first
self.locked_levels[0] = False
self.total_reward = 0
self.find_new_level = False # Indicates that we need a level change
self._unlock_levels()
def _play_human_mode(self):
while not self.game.is_episode_finished():
self.game.advance_action()
state = self.game.get_state()
episode_reward = self.game.get_total_reward()
(reward, self.total_reward) = self._calculate_reward(episode_reward, self.total_reward)
info = self._get_game_variables(state.game_variables)
info["SCORES"] = self.get_scores()
info["TOTAL_REWARD"] = round(self.total_reward, 4)
info["LOCKED_LEVELS"] = self.locked_levels
print('===============================')
print('State: #' + str(state.number))
print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)')
print('Reward: \t' + str(reward))
print('Total Reward: \t' + str(self.total_reward))
print('Variables: \n' + str(info))
sleep(0.02857) # 35 fps = 0.02857 sleep between frames
print('===============================')
print('Done')
return
def _get_next_level(self):
# Finds the unlocked level with the lowest average
averages = self.get_scores()
lowest_level = 0 # Defaulting to first level
lowest_score = 1001
for i in range(NUM_LEVELS):
if not self.locked_levels[i]:
if averages[i] < lowest_score:
lowest_level = i
lowest_score = averages[i]
return lowest_level
def _unlock_levels(self):
averages = self.get_scores()
for i in range(NUM_LEVELS - 2, -1, -1):
if self.locked_levels[i + 1] and averages[i] >= self.passing_grade:
self.locked_levels[i + 1] = False
return
def _start_episode(self):
if 0 == len(self.scores[self.level]):
self.scores[self.level] = [0] * self.min_tries_for_avg
else:
self.scores[self.level].insert(0, 0)
self.scores[self.level] = self.scores[self.level][:self.min_tries_for_avg]
self.is_new_episode = True
return super(MetaDoomEnv, self)._start_episode()
def change_level(self, new_level=None):
if new_level is not None and self.locked_levels[new_level] == False:
self.find_new_level = False
self.level = new_level
self.reset()
else:
self.find_new_level = False
self.level = self._get_next_level()
self.reset()
return
def _get_standard_reward(self, episode_reward):
# Returns a standardized reward for an episode (i.e. between 0 and 1,000)
min_score = float(DOOM_SETTINGS[self.level][MIN_SCORE])
target_score = float(DOOM_SETTINGS[self.level][TARGET_SCORE])
max_score = min_score + (target_score - min_score) / 0.99 # Target is 99th percentile (Scale 0-1000)
std_reward = round(1000 * (episode_reward - min_score) / (max_score - min_score), 4)
std_reward = min(1000, std_reward) # Cannot be more than 1,000
std_reward = max(0, std_reward) # Cannot be less than 0
return std_reward
def get_total_reward(self):
# Returns the sum of the average of all levels
total_score = 0
passed_levels = 0
for i in range(NUM_LEVELS):
if len(self.scores[i]) > 0:
level_total = 0
level_count = min(len(self.scores[i]), self.average_over)
for j in range(level_count):
level_total += self.scores[i][j]
level_average = level_total / level_count
if level_average >= 990:
passed_levels += 1
total_score += level_average
# Bonus for passing all levels (50 * num of levels)
if NUM_LEVELS == passed_levels:
total_score += NUM_LEVELS * 50
return round(total_score, 4)
def _calculate_reward(self, episode_reward, prev_total_reward):
# Calculates the action reward and the new total reward
std_reward = self._get_standard_reward(episode_reward)
self.scores[self.level][0] = std_reward
total_reward = self.get_total_reward()
reward = total_reward - prev_total_reward
return reward, total_reward
def get_scores(self):
# Returns a list with the averages per level
averages = [0] * NUM_LEVELS
for i in range(NUM_LEVELS):
if len(self.scores[i]) > 0:
level_total = 0
level_count = min(len(self.scores[i]), self.average_over)
for j in range(level_count):
level_total += self.scores[i][j]
level_average = level_total / level_count
averages[i] = round(level_average, 4)
return averages
def _reset(self):
# Reset is called on first step() after level is finished
# or when change_level() is called. Returning if neither have been called to
# avoid resetting the level twice
if self.find_new_level:
return
if self.is_initialized and not self._closed and self.previous_level == self.level:
self._start_episode()
return self.game.get_state().image_buffer.copy()
else:
return self._load_level()
def _step(self, action):
# Changing level
if self.find_new_level:
self.change_level()
if 'human' == self.mode:
self._play_human_mode()
obs = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
reward = 0
is_finished = True
info = self._get_game_variables(None)
else:
obs, step_reward, is_finished, info = super(MetaDoomEnv, self)._step(action)
reward, self.total_reward = self._calculate_reward(self.game.get_total_reward(), self.total_reward)
# First step() after new episode returns the entire total reward
# because stats_recorder resets the episode score to 0 after reset() is called
if self.is_new_episode:
reward = self.total_reward
self.is_new_episode = False
info["SCORES"] = self.get_scores()
info["TOTAL_REWARD"] = round(self.total_reward, 4)
info["LOCKED_LEVELS"] = self.locked_levels
# Indicating new level required
if is_finished:
self._unlock_levels()
self.find_new_level = True
return obs, reward, is_finished, info

View File

@@ -1,46 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomHealthGatheringEnv(doom_env.DoomEnv):
"""
------------ Training Mission 5 - Health Gathering ------------
This map is a guide on how to survive by collecting health packs.
It is a rectangle with green, acidic floor which hurts the player
periodically. There are also medkits spread around the map, and
additional kits will spawn at interval.
Allowed actions:
[13] - MOVE_FORWARD - Move forward - Values 0 or 1
[14] - TURN_RIGHT - Turn right - Values 0 or 1
[15] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - 35 times per second - Survive as long as possible
-100 - Death penalty
Goal: 1000 points
Stay alive long enough to reach 1,000 points (~ 30 secs)
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Player is dead
- Timeout (60 seconds - 2,100 frames)
Actions:
actions = [0] * 43
actions[13] = 0 # MOVE_FORWARD
actions[14] = 1 # TURN_RIGHT
actions[15] = 0 # TURN_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomHealthGatheringEnv, self).__init__(4)

View File

@@ -1,46 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomMyWayHomeEnv(doom_env.DoomEnv):
"""
------------ Training Mission 6 - My Way Home ------------
This map is designed to improve navigational skills. It is a series of
interconnected rooms and 1 corridor with a dead end. Each room
has a separate color. There is a green vest in one of the room.
The vest is always in the same room. Player must find the vest.
Allowed actions:
[13] - MOVE_FORWARD - Move forward - Values 0 or 1
[14] - TURN_RIGHT - Turn right - Values 0 or 1
[15] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Finding the vest
-0.0001 - 35 times per second - Find the vest quick!
Goal: 0.50 point
Find the vest
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Vest is found
- Timeout (1 minutes - 2,100 frames)
Actions:
actions = [0] * 43
actions[13] = 0 # MOVE_FORWARD
actions[14] = 1 # TURN_RIGHT
actions[15] = 0 # TURN_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomMyWayHomeEnv, self).__init__(5)

View File

@@ -1,51 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomPredictPositionEnv(doom_env.DoomEnv):
"""
------------ Training Mission 7 - Predict Position ------------
This map is designed to train you on using a rocket launcher.
It is a rectangular map with a monster on the opposite side. You need
to use your rocket launcher to kill it. The rocket adds a delay between
the moment it is fired and the moment it reaches the other side of the room.
You need to predict the position of the monster to kill it.
Allowed actions:
[0] - ATTACK - Shoot weapon - Values 0 or 1
[14] - TURN_RIGHT - Turn right - Values 0 or 1
[15] - TURN_LEFT - Turn left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - Killing the monster
-0.0001 - 35 times per second - Kill the monster faster!
Goal: 0.5 point
Kill the monster
Hint: Missile launcher takes longer to load. You must wait a good second after the game starts
before trying to fire it.
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Monster is dead
- Out of missile (you only have one)
- Timeout (20 seconds - 700 frames)
Actions:
actions = [0] * 43
actions[0] = 0 # ATTACK
actions[14] = 1 # TURN_RIGHT
actions[15] = 0 # TURN_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomPredictPositionEnv, self).__init__(6)

View File

@@ -1,42 +0,0 @@
import logging
from gym.envs.doom import doom_env
logger = logging.getLogger(__name__)
class DoomTakeCoverEnv(doom_env.DoomEnv):
"""
------------ Training Mission 8 - Take Cover ------------
This map is to train you on the damage of incoming missiles.
It is a rectangular map with monsters firing missiles and fireballs
at you. You need to survive as long as possible.
Allowed actions:
[10] - MOVE_RIGHT - Move to the right - Values 0 or 1
[11] - MOVE_LEFT - Move to the left - Values 0 or 1
Note: see controls.md for details
Rewards:
+ 1 - 35 times per second - Survive as long as possible
Goal: 750 points
Survive for ~ 20 seconds
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
Ends when:
- Player is dead (one or two fireballs should be enough to kill you)
- Timeout (60 seconds - 2,100 frames)
Actions:
actions = [0] * 43
actions[10] = 0 # MOVE_RIGHT
actions[11] = 1 # MOVE_LEFT
-----------------------------------------------------
"""
def __init__(self):
super(DoomTakeCoverEnv, self).__init__(7)

View File

@@ -1,116 +0,0 @@
"""
------------ Meta - Doom ------------
This is a meta map that combines all 9 Doom levels.
Levels:
0 - Doom Basic
1 - Doom Corridor
2 - Doom DefendCenter
3 - Doom DefendLine
4 - Doom HealthGathering
5 - Doom MyWayHome
6 - Doom PredictPosition
7 - Doom TakeCover
8 - Doom Deathmatch
Goal: 9,000 points
- Pass all levels
Scoring:
- Each level score has been standardized on a scale of 0 to 1,000
- The passing score for a level is 990 (99th percentile)
- A bonus of 450 (50 * 9 levels) is given if all levels are passed
- The score for a level is the average of the last 3 tries
- If there has been less than 3 tries for a level, the missing tries will have a score of 0
(e.g. if you score 1,000 on the first level on your first try, your level score will be (1,000+0+0)/ 3 = 333.33)
- The total score is the sum of the level scores, plus the bonus if you passed all levels.
e.g. List of tries:
- Level 0: 500
- Level 0: 750
- Level 0: 800
- Level 0: 1,000
- Level 1: 100
- Level 1: 200
Level score for level 0 = [1,000 + 800 + 750] / 3 = 850 (Average of last 3 tries)
Level score for level 1 = [200 + 100 + 0] / 3 = 100 (Tries not completed have a score of 0)
Level score for levels 2 to 8 = 0
Bonus score for passing all levels = 0
------------------------
Total score = 850 + 100 + 0 + 0 = 950
Changing Level:
- To unlock the next level, you must achieve a level score (avg of last 3 tries) of at least 600
(i.e. passing 60% of the last level)
- There are 2 ways to change level:
1) Manual method
- obs, reward, is_finished, info = env.step(action)
- if is_finished is true, you can call env.change_level(level_number) to change to an unlocked level
- you can see
the current level with info["LEVEL"]
the list of level score with info["SCORES"],
the list of locked levels with info["LOCKED_LEVELS"]
your total score with info["TOTAL_REWARD"]
e.g.
import gym
env = gym.make('meta-Doom-v0')
env.reset()
total_score = 0
while total_score < 9000:
action = [0] * 43
obs, reward, is_finished, info = env.step(action)
env.render()
total_score = info["TOTAL_REWARD"]
if is_finished:
env.change_level(level_you_want)
2) Automatic change
- if you don't call change_level() and the level is finished, the system will automatically select the
unlocked level with the lowest level score (which is likely to be the last unlocked level)
e.g.
import gym
env = gym.make('meta-Doom-v0')
env.reset()
total_score = 0
while total_score < 9000:
action = [0] * 43
obs, reward, is_finished, info = env.step(action)
env.render()
total_score = info["TOTAL_REWARD"]
Allowed actions:
- Each level has their own allowed actions, see each level for details
Mode:
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
- 'normal' will run at roughly 35 fps (easier for human to watch)
- 'human' will let you play the game (keyboard: Arrow Keys, '<', '>' and Ctrl, mouse available for Doom Deathmatch)
e.g. to start in human mode:
import gym
env = gym.make('meta-Doom-v0')
env.mode='human'
env.reset()
num_episodes = 10
for i in range(num_episodes):
env.step([0] * 43)
Actions:
actions = [0] * 43
actions[0] = 0 # ATTACK
actions[1] = 0 # USE
[...]
actions[42] = 0 # MOVE_UP_DOWN_DELTA
A full list of possible actions is available in controls.md
-----------------------------------------------------
"""

View File

@@ -22,11 +22,6 @@ def should_skip_env_spec_for_tests(spec):
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point)) logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
return True return True
# TODO: Issue #167 - Re-enable these tests after fixing DoomDeathmatch crash
if spec._entry_point.startswith('gym.envs.doom:DoomDeathmatchEnv'):
logger.warn("Skipping tests for DoomDeathmatchEnv {}".format(spec._entry_point))
return True
# Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104 # Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
if spec._entry_point.startswith('gym.envs.parameter_tuning:'): if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point)) logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))

View File

@@ -71,12 +71,6 @@ add_group(
description='Simple text environments to get you started.' description='Simple text environments to get you started.'
) )
add_group(
id='doom',
name='Doom',
description='Doom environments based on VizDoom.'
)
add_group( add_group(
id='safety', id='safety',
name='Safety', name='Safety',
@@ -864,316 +858,6 @@ In WaterWorld, the agent, a blue circle, must navigate around the world capturin
""" """
) )
# doom
add_task(
id='meta-Doom-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #1 to #9 - Beat all 9 Doom missions.',
description="""
This is a meta map that combines all 9 Doom levels.
Levels:
- #0 Doom Basic
- #1 Doom Corridor
- #2 Doom DefendCenter
- #3 Doom DefendLine
- #4 Doom HealthGathering
- #5 Doom MyWayHome
- #6 Doom PredictPosition
- #7 Doom TakeCover
- #8 Doom Deathmatch
Goal: 9,000 points
- Pass all levels
Scoring:
- Each level score has been standardized on a scale of 0 to 1,000
- The passing score for a level is 990 (99th percentile)
- A bonus of 450 (50 * 9 levels) is given if all levels are passed
- The score for a level is the average of the last 3 tries
"""
)
add_task(
id='DoomBasic-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #1 - Kill a single monster using your pistol.',
description="""
This map is rectangular with gray walls, ceiling and floor.
You are spawned in the center of the longer wall, and a red
circular monster is spawned randomly on the opposite wall.
You need to kill the monster (one bullet is enough).
Goal: 10 points
- Kill the monster in 3 secs with 1 shot
Rewards:
- Plus 101 pts for killing the monster
- Minus 5 pts for missing a shot
- Minus 1 pts every 0.028 secs
Ends when:
- Monster is dead
- Player is dead
- Timeout (10 seconds - 350 frames)
Allowed actions:
- ATTACK
- MOVE_RIGHT
- MOVE_LEFT
"""
)
add_task(
id='DoomCorridor-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #2 - Run as fast as possible to grab a vest.',
description="""
This map is designed to improve your navigation. There is a vest
at the end of the corridor, with 6 enemies (3 groups of 2). Your goal
is to get to the vest as soon as possible, without being killed.
Goal: 1,000 points
- Reach the vest (or get very close to it)
Rewards:
- Plus distance for getting closer to the vest
- Minus distance for getting further from the vest
- Minus 100 pts for getting killed
Ends when:
- Player touches vest
- Player is dead
- Timeout (1 minutes - 2,100 frames)
Allowed actions:
- ATTACK
- MOVE_RIGHT
- MOVE_LEFT
- MOVE_FORWARD
- TURN_RIGHT
- TURN_LEFT
"""
)
add_task(
id='DoomDefendCenter-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #3 - Kill enemies coming at your from all sides.',
description="""
This map is designed to teach you how to kill and how to stay alive.
You will also need to keep an eye on your ammunition level. You are only
rewarded for kills, so figure out how to stay alive.
The map is a circle with monsters. You are in the middle. Monsters will
respawn with additional health when killed. Kill as many as you can
before you run out of ammo.
Goal: 10 points
- Kill 11 monsters (you have 26 ammo)
Rewards:
- Plus 1 point for killing a monster
- Minus 1 point for getting killed
Ends when:
- Player is dead
- Timeout (60 seconds - 2100 frames)
Allowed actions:
- ATTACK
- TURN_RIGHT
- TURN_LEFT
"""
)
add_task(
id='DoomDefendLine-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #4 - Kill enemies on the other side of the room.',
description="""
This map is designed to teach you how to kill and how to stay alive.
Your ammo will automatically replenish. You are only rewarded for kills,
so figure out how to stay alive.
The map is a rectangle with monsters on the other side. Monsters will
respawn with additional health when killed. Kill as many as you can
before they kill you. This map is harder than the previous.
Goal: 15 points
- Kill 16 monsters
Rewards:
- Plus 1 point for killing a monster
- Minus 1 point for getting killed
Ends when:
- Player is dead
- Timeout (60 seconds - 2100 frames)
Allowed actions:
- ATTACK
- TURN_RIGHT
- TURN_LEFT
"""
)
add_task(
id='DoomHealthGathering-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #5 - Learn to grad medkits to survive as long as possible.',
description="""
This map is a guide on how to survive by collecting health packs.
It is a rectangle with green, acidic floor which hurts the player
periodically. There are also medkits spread around the map, and
additional kits will spawn at interval.
Goal: 1000 points
- Stay alive long enough for approx. 30 secs
Rewards:
- Plus 1 point every 0.028 secs
- Minus 100 pts for dying
Ends when:
- Player is dead
- Timeout (60 seconds - 2,100 frames)
Allowed actions:
- MOVE_FORWARD
- TURN_RIGHT
- TURN_LEFT
"""
)
add_task(
id='DoomMyWayHome-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #6 - Find the vest in one the 4 rooms.',
description="""
This map is designed to improve navigational skills. It is a series of
interconnected rooms and 1 corridor with a dead end. Each room
has a separate color. There is a green vest in one of the room.
The vest is always in the same room. Player must find the vest.
Goal: 0.50 point
- Find the vest
Rewards:
- Plus 1 point for finding the vest
- Minus 0.0001 point every 0.028 secs
Ends when:
- Vest is found
- Timeout (1 minutes - 2,100 frames)
Allowed actions:
- MOVE_FORWARD
- TURN_RIGHT
- TURN_LEFT
"""
)
add_task(
id='DoomPredictPosition-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #7 - Learn how to kill an enemy with a rocket launcher.',
description="""
This map is designed to train you on using a rocket launcher.
It is a rectangular map with a monster on the opposite side. You need
to use your rocket launcher to kill it. The rocket adds a delay between
the moment it is fired and the moment it reaches the other side of the room.
You need to predict the position of the monster to kill it.
Goal: 0.5 point
- Kill the monster
Rewards:
- Plus 1 point for killing the monster
- Minus 0.0001 point every 0.028 secs
Ends when:
- Monster is dead
- Out of missile (you only have one)
- Timeout (20 seconds - 700 frames)
Hint: Wait 1 sec for the missile launcher to load.
Allowed actions:
- ATTACK
- TURN_RIGHT
- TURN_LEFT
"""
)
add_task(
id='DoomTakeCover-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #8 - Survive as long as possible with enemies shooting at you.',
description="""
This map is to train you on the damage of incoming missiles.
It is a rectangular map with monsters firing missiles and fireballs
at you. You need to survive as long as possible.
Goal: 750 points
- Survive for approx. 20 seconds
Rewards:
- Plus 1 point every 0.028 secs
Ends when:
- Player is dead (1 or 2 fireballs is enough)
- Timeout (60 seconds - 2,100 frames)
Allowed actions:
- MOVE_RIGHT
- MOVE_LEFT
"""
)
add_task(
id='DoomDeathmatch-v0',
group='doom',
experimental=True,
contributor='ppaquette',
summary='Mission #9 - Kill as many enemies as possible without being killed.',
description="""
Kill as many monsters as possible without being killed.
Goal: 20 points
- Kill 20 monsters
Rewards:
- Plus 1 point for killing a monster
Ends when:
- Player is dead
- Timeout (3 minutes - 6,300 frames)
Allowed actions:
- ALL
"""
)
# Safety # Safety
# interpretability envs # interpretability envs

View File

@@ -33,8 +33,8 @@ def create_rollout(spec):
Returns a bool which indicates whether the new rollout was added to the json file. Returns a bool which indicates whether the new rollout was added to the json file.
""" """
# Skip platform-dependent Doom environments # Skip platform-dependent
if should_skip_env_spec_for_tests(spec) or 'Doom' in spec.id: if should_skip_env_spec_for_tests(spec):
logger.warn("Skipping tests for {}".format(spec.id)) logger.warn("Skipping tests for {}".format(spec.id))
return False return False

View File

@@ -11,7 +11,6 @@ extras = {
'board_game' : ['pachi-py>=0.0.19'], 'board_game' : ['pachi-py>=0.0.19'],
'box2d': ['box2d-py'], 'box2d': ['box2d-py'],
'classic_control': ['PyOpenGL'], 'classic_control': ['PyOpenGL'],
'doom': ['doom_py>=0.0.11'],
'mujoco': ['mujoco_py>=0.4.3', 'imageio'], 'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
'parameter_tuning': ['keras', 'theano'], 'parameter_tuning': ['keras', 'theano'],
} }
@@ -36,6 +35,6 @@ setup(name='gym',
'numpy>=1.10.4', 'requests>=2.0', 'six', 'pyglet>=1.2.0', 'numpy>=1.10.4', 'requests>=2.0', 'six', 'pyglet>=1.2.0',
], ],
extras_require=extras, extras_require=extras,
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png', 'envs/doom/assets/*.cfg']}, package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
tests_require=['nose2', 'mock'], tests_require=['nose2', 'mock'],
) )