mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-17 20:39:12 +00:00
Moved doom to gym_doom (#319)
This commit is contained in:
committed by
Greg Brockman
parent
3d29fb541b
commit
4b9984c00e
12
README.rst
12
README.rst
@@ -212,18 +212,6 @@ These are a variety of classic control tasks, which would appear in a typical re
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
Doom
|
||||
---------------
|
||||
|
||||
These tasks take place inside a Doom game (via the VizDoom project). If you didn't do the full install, you will need to run ``pip install -e '.[doom]'``. You can get started with them via:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('DoomBasic-v0')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
MuJoCo
|
||||
------
|
||||
|
||||
|
@@ -311,84 +311,6 @@ register(
|
||||
},
|
||||
)
|
||||
|
||||
# Doom
|
||||
# ----------------------------------------
|
||||
|
||||
register(
|
||||
id='meta-Doom-v0',
|
||||
entry_point='gym.envs.doom:MetaDoomEnv',
|
||||
timestep_limit=999999,
|
||||
reward_threshold=9000.0,
|
||||
kwargs={
|
||||
'average_over': 3,
|
||||
'passing_grade': 600,
|
||||
'min_tries_for_avg': 3
|
||||
},
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomBasic-v0',
|
||||
entry_point='gym.envs.doom:DoomBasicEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=10.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomCorridor-v0',
|
||||
entry_point='gym.envs.doom:DoomCorridorEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=1000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomDefendCenter-v0',
|
||||
entry_point='gym.envs.doom:DoomDefendCenterEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=10.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomDefendLine-v0',
|
||||
entry_point='gym.envs.doom:DoomDefendLineEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=15.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomHealthGathering-v0',
|
||||
entry_point='gym.envs.doom:DoomHealthGatheringEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=1000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomMyWayHome-v0',
|
||||
entry_point='gym.envs.doom:DoomMyWayHomeEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=0.5,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomPredictPosition-v0',
|
||||
entry_point='gym.envs.doom:DoomPredictPositionEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=0.5,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomTakeCover-v0',
|
||||
entry_point='gym.envs.doom:DoomTakeCoverEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=750.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DoomDeathmatch-v0',
|
||||
entry_point='gym.envs.doom:DoomDeathmatchEnv',
|
||||
timestep_limit=10000,
|
||||
reward_threshold=20.0,
|
||||
)
|
||||
|
||||
# Debugging
|
||||
# ----------------------------------------
|
||||
|
||||
|
@@ -1,10 +0,0 @@
|
||||
from gym.envs.doom.doom_env import DoomEnv, MetaDoomEnv
|
||||
from gym.envs.doom.doom_basic import DoomBasicEnv
|
||||
from gym.envs.doom.doom_corridor import DoomCorridorEnv
|
||||
from gym.envs.doom.doom_defend_center import DoomDefendCenterEnv
|
||||
from gym.envs.doom.doom_defend_line import DoomDefendLineEnv
|
||||
from gym.envs.doom.doom_health_gathering import DoomHealthGatheringEnv
|
||||
from gym.envs.doom.doom_my_way_home import DoomMyWayHomeEnv
|
||||
from gym.envs.doom.doom_predict_position import DoomPredictPositionEnv
|
||||
from gym.envs.doom.doom_take_cover import DoomTakeCoverEnv
|
||||
from gym.envs.doom.doom_deathmatch import DoomDeathmatchEnv
|
@@ -1,59 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards (Negative living reward means you lose points for staying alive, and need to finish asap)
|
||||
living_reward = -1
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = True
|
||||
render_crosshair = false
|
||||
render_weapon = true
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
|
||||
episode_start_time = 14
|
||||
|
||||
# make episodes finish after 35 tics (10 seconds)
|
||||
episode_timeout = 350
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
ATTACK
|
||||
MOVE_RIGHT
|
||||
MOVE_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,62 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards (Large penalty for being killed)
|
||||
death_penalty = 100
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = true
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# Episode Start Time (Immediate)
|
||||
episode_start_time = 0
|
||||
|
||||
# Make episodes finish after 2100 tics (1 minutes)
|
||||
episode_timeout = 2100
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
ATTACK
|
||||
MOVE_RIGHT
|
||||
MOVE_LEFT
|
||||
MOVE_FORWARD
|
||||
TURN_RIGHT
|
||||
TURN_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,103 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = true
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# Immediate Start
|
||||
episode_start_time = 0
|
||||
|
||||
# Make episodes finish after 3 minutes (6300 ticks)
|
||||
episode_timeout = 6300
|
||||
|
||||
# Available buttons
|
||||
# Currently disabled: [33] - DROP_SELECTED_WEAPON
|
||||
available_buttons =
|
||||
{
|
||||
ATTACK
|
||||
USE
|
||||
JUMP
|
||||
CROUCH
|
||||
TURN180
|
||||
# N. B. this is misspelled in vizdoom
|
||||
ALATTACK
|
||||
RELOAD
|
||||
ZOOM
|
||||
|
||||
SPEED
|
||||
STRAFE
|
||||
|
||||
MOVE_RIGHT
|
||||
MOVE_LEFT
|
||||
MOVE_BACKWARD
|
||||
MOVE_FORWARD
|
||||
TURN_RIGHT
|
||||
TURN_LEFT
|
||||
LOOK_UP
|
||||
LOOK_DOWN
|
||||
MOVE_UP
|
||||
MOVE_DOWN
|
||||
LAND
|
||||
|
||||
SELECT_WEAPON1
|
||||
SELECT_WEAPON2
|
||||
SELECT_WEAPON3
|
||||
SELECT_WEAPON4
|
||||
SELECT_WEAPON5
|
||||
SELECT_WEAPON6
|
||||
SELECT_WEAPON7
|
||||
SELECT_WEAPON8
|
||||
SELECT_WEAPON9
|
||||
SELECT_WEAPON0
|
||||
|
||||
SELECT_NEXT_WEAPON
|
||||
SELECT_PREV_WEAPON
|
||||
|
||||
ACTIVATE_SELECTED_WEAPON
|
||||
SELECT_NEXT_ITEM
|
||||
SELECT_PREV_ITEM
|
||||
DROP_SELECTED_ITEM
|
||||
|
||||
LOOK_UP_DOWN_DELTA
|
||||
TURN_LEFT_RIGHT_DELTA
|
||||
MOVE_FORWARD_BACKWARD_DELTA
|
||||
MOVE_LEFT_RIGHT_DELTA
|
||||
MOVE_UP_DOWN_DELTA
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,59 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards
|
||||
death_penalty = 1
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = true
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
|
||||
episode_start_time = 14
|
||||
|
||||
# Make episodes finish after 2100 tics (1 minutes)
|
||||
episode_timeout = 2100
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
ATTACK
|
||||
TURN_RIGHT
|
||||
TURN_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,59 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards
|
||||
death_penalty = 1
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = true
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
|
||||
episode_start_time = 14
|
||||
|
||||
# Make episodes finish after 2100 tics (1 minutes)
|
||||
episode_timeout = 2100
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
ATTACK
|
||||
TURN_RIGHT
|
||||
TURN_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,60 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards (Bonus for staying alive, large penalty for being killed)
|
||||
living_reward = 1
|
||||
death_penalty = 100
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = false
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
|
||||
episode_start_time = 14
|
||||
|
||||
# Make episodes finish after 2100 tics (1 minutes)
|
||||
episode_timeout = 2100
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
MOVE_FORWARD
|
||||
TURN_RIGHT
|
||||
TURN_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,59 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards
|
||||
living_reward = -0.0001
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = false
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
|
||||
episode_start_time = 14
|
||||
|
||||
# Make episodes finish after 2100 tics (1 minutes)
|
||||
episode_timeout = 2100
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
MOVE_FORWARD
|
||||
TURN_RIGHT
|
||||
TURN_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,59 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards
|
||||
living_reward = -0.0001
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = true
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# make episodes start after 16 tics (after unholstering the rocket launcher) (35 tics per seconds)
|
||||
episode_start_time = 16
|
||||
|
||||
# Make episodes finish after 700 tics (20 seconds)
|
||||
episode_timeout = 700
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
ATTACK
|
||||
TURN_RIGHT
|
||||
TURN_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,58 +0,0 @@
|
||||
# Lines starting with # are treated as comments (or with whitespaces+#).
|
||||
# It doesn't matter if you use capital letters or not.
|
||||
# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout.
|
||||
|
||||
# Rewards
|
||||
living_reward = 1
|
||||
|
||||
# Rendering options
|
||||
screen_format = BGR24
|
||||
render_hud = true
|
||||
render_crosshair = false
|
||||
render_weapon = false
|
||||
render_decals = false
|
||||
render_particles = false
|
||||
|
||||
# make episodes start after 14 tics (after unholstering the gun) (35 tics per seconds)
|
||||
episode_start_time = 14
|
||||
|
||||
# Make episodes finish after 2100 tics (1 minutes)
|
||||
episode_timeout = 2100
|
||||
|
||||
# Available buttons
|
||||
available_buttons =
|
||||
{
|
||||
MOVE_RIGHT
|
||||
MOVE_LEFT
|
||||
}
|
||||
|
||||
# Game variables that will be in the state
|
||||
available_game_variables =
|
||||
{
|
||||
KILLCOUNT
|
||||
ITEMCOUNT
|
||||
SECRETCOUNT
|
||||
FRAGCOUNT
|
||||
HEALTH
|
||||
ARMOR
|
||||
DEAD
|
||||
ON_GROUND
|
||||
ATTACK_READY
|
||||
ALTATTACK_READY
|
||||
|
||||
SELECTED_WEAPON
|
||||
SELECTED_WEAPON_AMMO
|
||||
|
||||
AMMO1
|
||||
AMMO2
|
||||
AMMO3
|
||||
AMMO4
|
||||
AMMO5
|
||||
AMMO6
|
||||
AMMO7
|
||||
AMMO8
|
||||
AMMO9
|
||||
AMMO0
|
||||
}
|
||||
|
||||
sound_enabled = false
|
@@ -1,83 +0,0 @@
|
||||
###Controls
|
||||
|
||||
Doom is usually played with a full keyboard, and multiple keys can be pressed at once.
|
||||
|
||||
To replicate this, we broke down the possible actions in 43 keys. Each key can be pressed (value of 1), or unpressed (value of 0).
|
||||
|
||||
The last 5 commands are deltas. [38] - LOOK_UP_DOWN_DELTA and [39] - TURN_LEFT_RIGHT_DELTA replicate mouse movement where values are in the
|
||||
range -10 to +10. They represent mouse movement over the x and y axis. (e.g. +5 for LOOK_UP_DOWN_DELTA will make the player look up 5 degrees)
|
||||
|
||||
[40] - MOVE_FORWARD_BACKWARD_DELTA, [41] - MOVE_LEFT_RIGHT_DELTA, and [42] - MOVE_UP_DOWN_DELTA represent the speed on an axis.
|
||||
Their values range from -100 to 100, where +100 is the maximum speed in one direction, and -100 is the maximum speed in the other.
|
||||
(e.g. MOVE_FORWARD_BACKWARD_DELTA of +100 will make the player move forward at 100% of max speed, and -100 will make the player
|
||||
move backward at 100% of max speed).
|
||||
|
||||
A list of values is expected to be passed as the action (e.g. [0, 1, 0, 0, 1, 0, .... ]).
|
||||
|
||||
Each mission is restricted on what actions can be performed, but the mapping is the same across all missions.
|
||||
|
||||
For example, if we want to [0] - ATTACK, [2] - JUMP, and [13] - MOVE_FORWARD at the same time, we would submit the following action:
|
||||
|
||||
```python
|
||||
action = [0] * 43
|
||||
action[0] = 1
|
||||
action[2] = 1
|
||||
action[13] = 1
|
||||
```
|
||||
|
||||
The full list of possible actions is:
|
||||
|
||||
* [0] - ATTACK - Shoot weapon - Values 0 or 1
|
||||
* [1] - USE - Use item - Values 0 or 1
|
||||
* [2] - JUMP - Jump - Values 0 or 1
|
||||
* [3] - CROUCH - Crouch - Values 0 or 1
|
||||
* [4] - TURN180 - Perform 180 turn - Values 0 or 1
|
||||
* [5] - ALT_ATTACK - Perform alternate attack
|
||||
* [6] - RELOAD - Reload weapon - Values 0 or 1
|
||||
* [7] - ZOOM - Toggle zoom in/out - Values 0 or 1
|
||||
* [8] - SPEED - Run faster - Values 0 or 1
|
||||
* [9] - STRAFE - Strafe (moving sideways in a circle) - Values 0 or 1
|
||||
* [10] - MOVE_RIGHT - Move to the right - Values 0 or 1
|
||||
* [11] - MOVE_LEFT - Move to the left - Values 0 or 1
|
||||
* [12] - MOVE_BACKWARD - Move backward - Values 0 or 1
|
||||
* [13] - MOVE_FORWARD - Move forward - Values 0 or 1
|
||||
* [14] - TURN_RIGHT - Turn right - Values 0 or 1
|
||||
* [15] - TURN_LEFT - Turn left - Values 0 or 1
|
||||
* [16] - LOOK_UP - Look up - Values 0 or 1
|
||||
* [17] - LOOK_DOWN - Look down - Values 0 or 1
|
||||
* [18] - MOVE_UP - Move up - Values 0 or 1
|
||||
* [19] - MOVE_DOWN - Move down - Values 0 or 1
|
||||
* [20] - LAND - Land (e.g. drop from ladder) - Values 0 or 1
|
||||
* [21] - SELECT_WEAPON1 - Select weapon 1 - Values 0 or 1
|
||||
* [22] - SELECT_WEAPON2 - Select weapon 2 - Values 0 or 1
|
||||
* [23] - SELECT_WEAPON3 - Select weapon 3 - Values 0 or 1
|
||||
* [24] - SELECT_WEAPON4 - Select weapon 4 - Values 0 or 1
|
||||
* [25] - SELECT_WEAPON5 - Select weapon 5 - Values 0 or 1
|
||||
* [26] - SELECT_WEAPON6 - Select weapon 6 - Values 0 or 1
|
||||
* [27] - SELECT_WEAPON7 - Select weapon 7 - Values 0 or 1
|
||||
* [28] - SELECT_WEAPON8 - Select weapon 8 - Values 0 or 1
|
||||
* [29] - SELECT_WEAPON9 - Select weapon 9 - Values 0 or 1
|
||||
* [30] - SELECT_WEAPON0 - Select weapon 0 - Values 0 or 1
|
||||
* [31] - SELECT_NEXT_WEAPON - Select next weapon - Values 0 or 1
|
||||
* [32] - SELECT_PREV_WEAPON - Select previous weapon - Values 0 or 1
|
||||
* [33] - DROP_SELECTED_WEAPON - Drop selected weapon - Values 0 or 1
|
||||
* [34] - ACTIVATE_SELECTED_WEAPON - Activate selected weapon - Values 0 or 1
|
||||
* [35] - SELECT_NEXT_ITEM - Select next item - Values 0 or 1
|
||||
* [36] - SELECT_PREV_ITEM - Select previous item - Values 0 or 1
|
||||
* [37] - DROP_SELECTED_ITEM - Drop selected item - Values 0 or 1
|
||||
* [38] - LOOK_UP_DOWN_DELTA - Look Up/Down - Range of -10 to 10 (integer).
|
||||
- Value is the angle - +5 will look up 5 degrees, -5 will look down 5 degrees
|
||||
* [39] - TURN_LEFT_RIGHT_DELTA - Turn Left/Right - Range of -10 to 10 (integer).
|
||||
- Value is the angle - +5 will turn right 5 degrees, -5 will turn left 5 degrees
|
||||
* [40] - MOVE_FORWARD_BACKWARD_DELTA - Speed of forward/backward movement - Range -100 to 100 (integer).
|
||||
- +100 is max speed forward, -100 is max speed backward, 0 is no movement
|
||||
* [41] - MOVE_LEFT_RIGHT_DELTA - Speed of left/right movement - Range -100 to 100 (integer).
|
||||
- +100 is max speed right, -100 is max speed left, 0 is no movement
|
||||
* [42] - MOVE_UP_DOWN_DELTA - Speed of up/down movement - Range -100 to 100 (integer).
|
||||
- +100 is max speed up, -100 is max speed down, 0 is no movement
|
||||
|
||||
To control the player in 'human' mode, the following keys should work:
|
||||
|
||||
* Arrow Keys for MOVE_FORWARD, MOVE_BACKWARD, LEFT_TURN, RIGHT_TURN
|
||||
* '<' and '>' for MOVE_RIGHT and MOVE_LEFT
|
||||
* Ctrl (or left mouse click) for ATTACK
|
@@ -1,48 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomBasicEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 1 - Basic ------------
|
||||
This map is rectangular with gray walls, ceiling and floor.
|
||||
You are spawned in the center of the longer wall, and a red
|
||||
circular monster is spawned randomly on the opposite wall.
|
||||
You need to kill the monster (one bullet is enough).
|
||||
|
||||
Allowed actions:
|
||||
[0] - ATTACK - Shoot weapon - Values 0 or 1
|
||||
[10] - MOVE_RIGHT - Move to the right - Values 0 or 1
|
||||
[11] - MOVE_LEFT - Move to the left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+101 - Killing the monster
|
||||
- 5 - Missing a shot
|
||||
- 1 - 35 times per second - Kill the monster faster!
|
||||
|
||||
Goal: 10 points
|
||||
Kill the monster in 3 secs with 1 shot
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Monster is dead
|
||||
- Player is dead
|
||||
- Timeout (10 seconds - 350 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[0] = 0 # ATTACK
|
||||
actions[10] = 1 # MOVE_RIGHT
|
||||
actions[11] = 0 # MOVE_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomBasicEnv, self).__init__(0)
|
@@ -1,53 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomCorridorEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 2 - Corridor ------------
|
||||
This map is designed to improve your navigation. There is a vest
|
||||
at the end of the corridor, with 6 enemies (3 groups of 2). Your goal
|
||||
is to get to the vest as soon as possible, without being killed.
|
||||
|
||||
Allowed actions:
|
||||
[0] - ATTACK - Shoot weapon - Values 0 or 1
|
||||
[10] - MOVE_RIGHT - Move to the right - Values 0 or 1
|
||||
[11] - MOVE_LEFT - Move to the left - Values 0 or 1
|
||||
[13] - MOVE_FORWARD - Move forward - Values 0 or 1
|
||||
[14] - TURN_RIGHT - Turn right - Values 0 or 1
|
||||
[15] - TURN_LEFT - Turn left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+ dX - For getting closer to the vest
|
||||
- dX - For getting further from the vest
|
||||
-100 - Penalty for being killed
|
||||
|
||||
Goal: 1,000 points
|
||||
Reach the vest (or at least get past the guards in the 3rd group)
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Player touches vest
|
||||
- Player is dead
|
||||
- Timeout (1 minutes - 2,100 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[0] = 0 # ATTACK
|
||||
actions[10] = 1 # MOVE_RIGHT
|
||||
actions[11] = 0 # MOVE_LEFT
|
||||
actions[13] = 0 # MOVE_FORWARD
|
||||
actions[14] = 0 # TURN_RIGHT
|
||||
actions[15] = 0 # TURN_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomCorridorEnv, self).__init__(1)
|
@@ -1,45 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomDeathmatchEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Final Mission - Deathmatch ------------
|
||||
Kill as many monsters as possible without being killed.
|
||||
|
||||
Allowed actions:
|
||||
ALL
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+1 - Killing a monster
|
||||
|
||||
Goal: 20 points
|
||||
Kill 20 monsters
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (mouse and full keyboard)
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (3 minutes - 6,300 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[0] = 0 # ATTACK
|
||||
actions[1] = 0 # USE
|
||||
[...]
|
||||
actions[42] = 0 # MOVE_UP_DOWN_DELTA
|
||||
A full list of possible actions is available in controls.md
|
||||
|
||||
Note:
|
||||
actions[33] (DROP_SELECTED_WEAPON) is currently disabled, because it causes VizDoom to crash
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomDeathmatchEnv, self).__init__(8)
|
@@ -1,49 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomDefendCenterEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 3 - Defend the Center ------------
|
||||
This map is designed to teach you how to kill and how to stay alive.
|
||||
You will also need to keep an eye on your ammunition level. You are only
|
||||
rewarded for kills, so figure out how to stay alive.
|
||||
|
||||
The map is a circle with monsters. You are in the middle. Monsters will
|
||||
respawn with additional health when killed. Kill as many as you can
|
||||
before you run out of ammo.
|
||||
|
||||
Allowed actions:
|
||||
[0] - ATTACK - Shoot weapon - Values 0 or 1
|
||||
[14] - TURN_RIGHT - Turn right - Values 0 or 1
|
||||
[15] - TURN_LEFT - Turn left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+ 1 - Killing a monster
|
||||
- 1 - Penalty for being killed
|
||||
|
||||
Goal: 10 points
|
||||
Kill 11 monsters (you have 26 ammo)
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (60 seconds - 2100 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[0] = 0 # ATTACK
|
||||
actions[14] = 1 # TURN_RIGHT
|
||||
actions[15] = 0 # TURN_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomDefendCenterEnv, self).__init__(2)
|
@@ -1,49 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomDefendLineEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 4 - Defend the Line ------------
|
||||
This map is designed to teach you how to kill and how to stay alive.
|
||||
Your ammo will automatically replenish. You are only rewarded for kills,
|
||||
so figure out how to stay alive.
|
||||
|
||||
The map is a rectangle with monsters on the other side. Monsters will
|
||||
respawn with additional health when killed. Kill as many as you can
|
||||
before they kill you. This map is harder than the previous.
|
||||
|
||||
Allowed actions:
|
||||
[0] - ATTACK - Shoot weapon - Values 0 or 1
|
||||
[14] - TURN_RIGHT - Turn right - Values 0 or 1
|
||||
[15] - TURN_LEFT - Turn left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+ 1 - Killing a monster
|
||||
- 1 - Penalty for being killed
|
||||
|
||||
Goal: 15 points
|
||||
Kill 16 monsters
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (60 seconds - 2100 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[0] = 0 # ATTACK
|
||||
actions[14] = 1 # TURN_RIGHT
|
||||
actions[15] = 0 # TURN_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomDefendLineEnv, self).__init__(3)
|
@@ -1,415 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
import numpy as np
|
||||
|
||||
import gym
|
||||
from gym import utils, spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
try:
|
||||
import doom_py
|
||||
from doom_py import DoomGame, Mode, Button, GameVariable, ScreenFormat, ScreenResolution, Loader
|
||||
except ImportError as e:
|
||||
raise gym.error.DependencyNotInstalled("{}. (HINT: you can install Doom dependencies " +
|
||||
"with 'pip install gym[doom].)'".format(e))
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Constants
|
||||
NUM_ACTIONS = 43
|
||||
NUM_LEVELS = 9
|
||||
CONFIG = 0
|
||||
SCENARIO = 1
|
||||
MAP = 2
|
||||
DIFFICULTY = 3
|
||||
ACTIONS = 4
|
||||
MIN_SCORE = 5
|
||||
TARGET_SCORE = 6
|
||||
|
||||
# Format (config, scenario, map, difficulty, actions, min, target)
|
||||
DOOM_SETTINGS = [
|
||||
['basic.cfg', 'basic.wad', 'map01', 5, [0, 10, 11], -485, 10], # 0 - Basic
|
||||
['deadly_corridor.cfg', 'deadly_corridor.wad', '', 1, [0, 10, 11, 13, 14, 15], -120, 1000], # 1 - Corridor
|
||||
['defend_the_center.cfg', 'defend_the_center.wad', '', 5, [0, 14, 15], -1, 10], # 2 - DefendCenter
|
||||
['defend_the_line.cfg', 'defend_the_line.wad', '', 5, [0, 14, 15], -1, 15], # 3 - DefendLine
|
||||
['health_gathering.cfg', 'health_gathering.wad', 'map01', 5, [13, 14, 15], 0, 1000], # 4 - HealthGathering
|
||||
['my_way_home.cfg', 'my_way_home.wad', '', 5, [13, 14, 15], -0.22, 0.5], # 5 - MyWayHome
|
||||
['predict_position.cfg', 'predict_position.wad', 'map01', 3, [0, 14, 15], -0.075, 0.5], # 6 - PredictPosition
|
||||
['take_cover.cfg', 'take_cover.wad', 'map01', 5, [10, 11], 0, 750], # 7 - TakeCover
|
||||
['deathmatch.cfg', 'deathmatch.wad', '', 5, [x for x in range(NUM_ACTIONS) if x != 33], 0, 20] # 8 - Deathmatch
|
||||
]
|
||||
|
||||
|
||||
class DoomEnv(gym.Env, utils.EzPickle):
|
||||
metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35}
|
||||
|
||||
def __init__(self, level):
|
||||
utils.EzPickle.__init__(self)
|
||||
self.previous_level = -1
|
||||
self.level = level
|
||||
self.game = DoomGame()
|
||||
self.loader = Loader()
|
||||
self.doom_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
self.mode = 'fast' # 'human', 'fast' or 'normal'
|
||||
self.no_render = False # To disable double rendering in human mode
|
||||
self.viewer = None
|
||||
self.is_initialized = False # Indicates that reset() has been called
|
||||
self.curr_seed = 0
|
||||
self.action_space = spaces.MultiDiscrete([[0, 1]] * 38 + [[-10, 10]] * 2 + [[-100, 100]] * 3)
|
||||
self.allowed_actions = list(range(NUM_ACTIONS))
|
||||
self._seed()
|
||||
self._configure()
|
||||
|
||||
def _configure(self, screen_resolution=ScreenResolution.RES_640X480):
|
||||
# Often agents end up downsampling the observations. Configuring Doom to
|
||||
# return a smaller image yields significant (~10x) speedups
|
||||
if screen_resolution == ScreenResolution.RES_640X480:
|
||||
self.screen_height = 480
|
||||
self.screen_width = 640
|
||||
self.screen_resolution = ScreenResolution.RES_640X480
|
||||
elif screen_resolution == ScreenResolution.RES_160X120:
|
||||
self.screen_height = 120
|
||||
self.screen_width = 160
|
||||
self.screen_resolution = ScreenResolution.RES_160X120
|
||||
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3))
|
||||
|
||||
def _load_level(self):
|
||||
# Closing if is_initialized
|
||||
if self.is_initialized:
|
||||
self.is_initialized = False
|
||||
self.game.close()
|
||||
self.game = DoomGame()
|
||||
|
||||
# Loading Paths
|
||||
if not self.is_initialized:
|
||||
self.game.set_vizdoom_path(self.loader.get_vizdoom_path())
|
||||
self.game.set_doom_game_path(self.loader.get_freedoom_path())
|
||||
|
||||
# Common settings
|
||||
self._closed = False
|
||||
self.game.load_config(os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG]))
|
||||
self.game.set_doom_scenario_path(self.loader.get_scenario_path(DOOM_SETTINGS[self.level][SCENARIO]))
|
||||
if DOOM_SETTINGS[self.level][MAP] != '':
|
||||
self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP])
|
||||
self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY])
|
||||
self.previous_level = self.level
|
||||
self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS]
|
||||
self.game.set_screen_resolution(self.screen_resolution)
|
||||
|
||||
# Algo mode
|
||||
if 'human' != self.mode:
|
||||
self.game.set_window_visible(False)
|
||||
self.game.set_mode(Mode.PLAYER)
|
||||
self.no_render = False
|
||||
self.game.init()
|
||||
self._start_episode()
|
||||
self.is_initialized = True
|
||||
return self.game.get_state().image_buffer.copy()
|
||||
|
||||
# Human mode
|
||||
else:
|
||||
self.game.add_game_args('+freelook 1')
|
||||
self.game.set_window_visible(True)
|
||||
self.game.set_mode(Mode.SPECTATOR)
|
||||
self.no_render = True
|
||||
self.game.init()
|
||||
self._start_episode()
|
||||
self.is_initialized = True
|
||||
self._play_human_mode()
|
||||
return np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
|
||||
|
||||
def _start_episode(self):
|
||||
if self.curr_seed > 0:
|
||||
self.game.set_seed(self.curr_seed)
|
||||
self.curr_seed = 0
|
||||
self.game.new_episode()
|
||||
return
|
||||
|
||||
def _play_human_mode(self):
|
||||
while not self.game.is_episode_finished():
|
||||
self.game.advance_action()
|
||||
state = self.game.get_state()
|
||||
total_reward = self.game.get_total_reward()
|
||||
info = self._get_game_variables(state.game_variables)
|
||||
info["TOTAL_REWARD"] = round(total_reward, 4)
|
||||
print('===============================')
|
||||
print('State: #' + str(state.number))
|
||||
print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)')
|
||||
print('Reward: \t' + str(self.game.get_last_reward()))
|
||||
print('Total Reward: \t' + str(total_reward))
|
||||
print('Variables: \n' + str(info))
|
||||
sleep(0.02857) # 35 fps = 0.02857 sleep between frames
|
||||
print('===============================')
|
||||
print('Done')
|
||||
return
|
||||
|
||||
def _step(self, action):
|
||||
if NUM_ACTIONS != len(action):
|
||||
logger.warn('Doom action list must contain %d items. Padding missing items with 0' % NUM_ACTIONS)
|
||||
old_action = action
|
||||
action = [0] * NUM_ACTIONS
|
||||
for i in range(len(old_action)):
|
||||
action[i] = old_action[i]
|
||||
# action is a list of numbers but DoomGame.make_action expects a list of ints
|
||||
if len(self.allowed_actions) > 0:
|
||||
list_action = [int(action[action_idx]) for action_idx in self.allowed_actions]
|
||||
else:
|
||||
list_action = [int(x) for x in action]
|
||||
try:
|
||||
reward = self.game.make_action(list_action)
|
||||
state = self.game.get_state()
|
||||
info = self._get_game_variables(state.game_variables)
|
||||
info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4)
|
||||
|
||||
if self.game.is_episode_finished():
|
||||
is_finished = True
|
||||
return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info
|
||||
else:
|
||||
is_finished = False
|
||||
return state.image_buffer.copy(), reward, is_finished, info
|
||||
|
||||
except doom_py.vizdoom.ViZDoomIsNotRunningException:
|
||||
return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {}
|
||||
|
||||
def _reset(self):
|
||||
if self.is_initialized and not self._closed:
|
||||
self._start_episode()
|
||||
return self.game.get_state().image_buffer.copy()
|
||||
else:
|
||||
return self._load_level()
|
||||
|
||||
def _render(self, mode='human', close=False):
|
||||
if close:
|
||||
if self.viewer is not None:
|
||||
self.viewer.close()
|
||||
self.viewer = None # If we don't None out this reference pyglet becomes unhappy
|
||||
return
|
||||
try:
|
||||
if 'human' == mode and self.no_render:
|
||||
return
|
||||
state = self.game.get_state()
|
||||
img = state.image_buffer
|
||||
# VizDoom returns None if the episode is finished, let's make it
|
||||
# an empty image so the recorder doesn't stop
|
||||
if img is None:
|
||||
img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
|
||||
if mode == 'rgb_array':
|
||||
return img
|
||||
elif mode is 'human':
|
||||
from gym.envs.classic_control import rendering
|
||||
if self.viewer is None:
|
||||
self.viewer = rendering.SimpleImageViewer()
|
||||
self.viewer.imshow(img)
|
||||
if 'normal' == self.mode:
|
||||
sleep(0.02857) # 35 fps = 0.02857 sleep between frames
|
||||
except doom_py.vizdoom.ViZDoomIsNotRunningException:
|
||||
pass # Doom has been closed
|
||||
|
||||
def _close(self):
|
||||
self.game.close()
|
||||
|
||||
def _seed(self, seed=None):
|
||||
self.curr_seed = seeding.hash_seed(seed) % 2 ** 32
|
||||
return [self.curr_seed]
|
||||
|
||||
def _get_game_variables(self, state_variables):
|
||||
info = {
|
||||
"LEVEL": self.level
|
||||
}
|
||||
if state_variables is None:
|
||||
return info
|
||||
info['KILLCOUNT'] = state_variables[0]
|
||||
info['ITEMCOUNT'] = state_variables[1]
|
||||
info['SECRETCOUNT'] = state_variables[2]
|
||||
info['FRAGCOUNT'] = state_variables[3]
|
||||
info['HEALTH'] = state_variables[4]
|
||||
info['ARMOR'] = state_variables[5]
|
||||
info['DEAD'] = state_variables[6]
|
||||
info['ON_GROUND'] = state_variables[7]
|
||||
info['ATTACK_READY'] = state_variables[8]
|
||||
info['ALTATTACK_READY'] = state_variables[9]
|
||||
info['SELECTED_WEAPON'] = state_variables[10]
|
||||
info['SELECTED_WEAPON_AMMO'] = state_variables[11]
|
||||
info['AMMO1'] = state_variables[12]
|
||||
info['AMMO2'] = state_variables[13]
|
||||
info['AMMO3'] = state_variables[14]
|
||||
info['AMMO4'] = state_variables[15]
|
||||
info['AMMO5'] = state_variables[16]
|
||||
info['AMMO6'] = state_variables[17]
|
||||
info['AMMO7'] = state_variables[18]
|
||||
info['AMMO8'] = state_variables[19]
|
||||
info['AMMO9'] = state_variables[20]
|
||||
info['AMMO0'] = state_variables[21]
|
||||
return info
|
||||
|
||||
|
||||
class MetaDoomEnv(DoomEnv):
|
||||
|
||||
def __init__(self, average_over=10, passing_grade=600, min_tries_for_avg=5):
|
||||
super(MetaDoomEnv, self).__init__(0)
|
||||
self.average_over = average_over
|
||||
self.passing_grade = passing_grade
|
||||
self.min_tries_for_avg = min_tries_for_avg # Need to use at least this number of tries to calc avg
|
||||
self.scores = [[]] * NUM_LEVELS
|
||||
self.locked_levels = [True] * NUM_LEVELS # Locking all levels but the first
|
||||
self.locked_levels[0] = False
|
||||
self.total_reward = 0
|
||||
self.find_new_level = False # Indicates that we need a level change
|
||||
self._unlock_levels()
|
||||
|
||||
def _play_human_mode(self):
|
||||
while not self.game.is_episode_finished():
|
||||
self.game.advance_action()
|
||||
state = self.game.get_state()
|
||||
episode_reward = self.game.get_total_reward()
|
||||
(reward, self.total_reward) = self._calculate_reward(episode_reward, self.total_reward)
|
||||
info = self._get_game_variables(state.game_variables)
|
||||
info["SCORES"] = self.get_scores()
|
||||
info["TOTAL_REWARD"] = round(self.total_reward, 4)
|
||||
info["LOCKED_LEVELS"] = self.locked_levels
|
||||
print('===============================')
|
||||
print('State: #' + str(state.number))
|
||||
print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)')
|
||||
print('Reward: \t' + str(reward))
|
||||
print('Total Reward: \t' + str(self.total_reward))
|
||||
print('Variables: \n' + str(info))
|
||||
sleep(0.02857) # 35 fps = 0.02857 sleep between frames
|
||||
print('===============================')
|
||||
print('Done')
|
||||
return
|
||||
|
||||
def _get_next_level(self):
|
||||
# Finds the unlocked level with the lowest average
|
||||
averages = self.get_scores()
|
||||
lowest_level = 0 # Defaulting to first level
|
||||
lowest_score = 1001
|
||||
for i in range(NUM_LEVELS):
|
||||
if not self.locked_levels[i]:
|
||||
if averages[i] < lowest_score:
|
||||
lowest_level = i
|
||||
lowest_score = averages[i]
|
||||
return lowest_level
|
||||
|
||||
def _unlock_levels(self):
|
||||
averages = self.get_scores()
|
||||
for i in range(NUM_LEVELS - 2, -1, -1):
|
||||
if self.locked_levels[i + 1] and averages[i] >= self.passing_grade:
|
||||
self.locked_levels[i + 1] = False
|
||||
return
|
||||
|
||||
def _start_episode(self):
|
||||
if 0 == len(self.scores[self.level]):
|
||||
self.scores[self.level] = [0] * self.min_tries_for_avg
|
||||
else:
|
||||
self.scores[self.level].insert(0, 0)
|
||||
self.scores[self.level] = self.scores[self.level][:self.min_tries_for_avg]
|
||||
self.is_new_episode = True
|
||||
return super(MetaDoomEnv, self)._start_episode()
|
||||
|
||||
def change_level(self, new_level=None):
|
||||
if new_level is not None and self.locked_levels[new_level] == False:
|
||||
self.find_new_level = False
|
||||
self.level = new_level
|
||||
self.reset()
|
||||
else:
|
||||
self.find_new_level = False
|
||||
self.level = self._get_next_level()
|
||||
self.reset()
|
||||
return
|
||||
|
||||
def _get_standard_reward(self, episode_reward):
|
||||
# Returns a standardized reward for an episode (i.e. between 0 and 1,000)
|
||||
min_score = float(DOOM_SETTINGS[self.level][MIN_SCORE])
|
||||
target_score = float(DOOM_SETTINGS[self.level][TARGET_SCORE])
|
||||
max_score = min_score + (target_score - min_score) / 0.99 # Target is 99th percentile (Scale 0-1000)
|
||||
std_reward = round(1000 * (episode_reward - min_score) / (max_score - min_score), 4)
|
||||
std_reward = min(1000, std_reward) # Cannot be more than 1,000
|
||||
std_reward = max(0, std_reward) # Cannot be less than 0
|
||||
return std_reward
|
||||
|
||||
def get_total_reward(self):
|
||||
# Returns the sum of the average of all levels
|
||||
total_score = 0
|
||||
passed_levels = 0
|
||||
for i in range(NUM_LEVELS):
|
||||
if len(self.scores[i]) > 0:
|
||||
level_total = 0
|
||||
level_count = min(len(self.scores[i]), self.average_over)
|
||||
for j in range(level_count):
|
||||
level_total += self.scores[i][j]
|
||||
level_average = level_total / level_count
|
||||
if level_average >= 990:
|
||||
passed_levels += 1
|
||||
total_score += level_average
|
||||
# Bonus for passing all levels (50 * num of levels)
|
||||
if NUM_LEVELS == passed_levels:
|
||||
total_score += NUM_LEVELS * 50
|
||||
return round(total_score, 4)
|
||||
|
||||
def _calculate_reward(self, episode_reward, prev_total_reward):
|
||||
# Calculates the action reward and the new total reward
|
||||
std_reward = self._get_standard_reward(episode_reward)
|
||||
self.scores[self.level][0] = std_reward
|
||||
total_reward = self.get_total_reward()
|
||||
reward = total_reward - prev_total_reward
|
||||
return reward, total_reward
|
||||
|
||||
def get_scores(self):
|
||||
# Returns a list with the averages per level
|
||||
averages = [0] * NUM_LEVELS
|
||||
for i in range(NUM_LEVELS):
|
||||
if len(self.scores[i]) > 0:
|
||||
level_total = 0
|
||||
level_count = min(len(self.scores[i]), self.average_over)
|
||||
for j in range(level_count):
|
||||
level_total += self.scores[i][j]
|
||||
level_average = level_total / level_count
|
||||
averages[i] = round(level_average, 4)
|
||||
return averages
|
||||
|
||||
def _reset(self):
|
||||
# Reset is called on first step() after level is finished
|
||||
# or when change_level() is called. Returning if neither have been called to
|
||||
# avoid resetting the level twice
|
||||
if self.find_new_level:
|
||||
return
|
||||
|
||||
if self.is_initialized and not self._closed and self.previous_level == self.level:
|
||||
self._start_episode()
|
||||
return self.game.get_state().image_buffer.copy()
|
||||
else:
|
||||
return self._load_level()
|
||||
|
||||
def _step(self, action):
|
||||
# Changing level
|
||||
if self.find_new_level:
|
||||
self.change_level()
|
||||
|
||||
if 'human' == self.mode:
|
||||
self._play_human_mode()
|
||||
obs = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
|
||||
reward = 0
|
||||
is_finished = True
|
||||
info = self._get_game_variables(None)
|
||||
else:
|
||||
obs, step_reward, is_finished, info = super(MetaDoomEnv, self)._step(action)
|
||||
reward, self.total_reward = self._calculate_reward(self.game.get_total_reward(), self.total_reward)
|
||||
# First step() after new episode returns the entire total reward
|
||||
# because stats_recorder resets the episode score to 0 after reset() is called
|
||||
if self.is_new_episode:
|
||||
reward = self.total_reward
|
||||
|
||||
self.is_new_episode = False
|
||||
info["SCORES"] = self.get_scores()
|
||||
info["TOTAL_REWARD"] = round(self.total_reward, 4)
|
||||
info["LOCKED_LEVELS"] = self.locked_levels
|
||||
|
||||
# Indicating new level required
|
||||
if is_finished:
|
||||
self._unlock_levels()
|
||||
self.find_new_level = True
|
||||
|
||||
return obs, reward, is_finished, info
|
@@ -1,46 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomHealthGatheringEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 5 - Health Gathering ------------
|
||||
This map is a guide on how to survive by collecting health packs.
|
||||
It is a rectangle with green, acidic floor which hurts the player
|
||||
periodically. There are also medkits spread around the map, and
|
||||
additional kits will spawn at interval.
|
||||
|
||||
Allowed actions:
|
||||
[13] - MOVE_FORWARD - Move forward - Values 0 or 1
|
||||
[14] - TURN_RIGHT - Turn right - Values 0 or 1
|
||||
[15] - TURN_LEFT - Turn left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+ 1 - 35 times per second - Survive as long as possible
|
||||
-100 - Death penalty
|
||||
|
||||
Goal: 1000 points
|
||||
Stay alive long enough to reach 1,000 points (~ 30 secs)
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (60 seconds - 2,100 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[13] = 0 # MOVE_FORWARD
|
||||
actions[14] = 1 # TURN_RIGHT
|
||||
actions[15] = 0 # TURN_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomHealthGatheringEnv, self).__init__(4)
|
@@ -1,46 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomMyWayHomeEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 6 - My Way Home ------------
|
||||
This map is designed to improve navigational skills. It is a series of
|
||||
interconnected rooms and 1 corridor with a dead end. Each room
|
||||
has a separate color. There is a green vest in one of the room.
|
||||
The vest is always in the same room. Player must find the vest.
|
||||
|
||||
Allowed actions:
|
||||
[13] - MOVE_FORWARD - Move forward - Values 0 or 1
|
||||
[14] - TURN_RIGHT - Turn right - Values 0 or 1
|
||||
[15] - TURN_LEFT - Turn left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+ 1 - Finding the vest
|
||||
-0.0001 - 35 times per second - Find the vest quick!
|
||||
|
||||
Goal: 0.50 point
|
||||
Find the vest
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Vest is found
|
||||
- Timeout (1 minutes - 2,100 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[13] = 0 # MOVE_FORWARD
|
||||
actions[14] = 1 # TURN_RIGHT
|
||||
actions[15] = 0 # TURN_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomMyWayHomeEnv, self).__init__(5)
|
@@ -1,51 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomPredictPositionEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 7 - Predict Position ------------
|
||||
This map is designed to train you on using a rocket launcher.
|
||||
It is a rectangular map with a monster on the opposite side. You need
|
||||
to use your rocket launcher to kill it. The rocket adds a delay between
|
||||
the moment it is fired and the moment it reaches the other side of the room.
|
||||
You need to predict the position of the monster to kill it.
|
||||
|
||||
Allowed actions:
|
||||
[0] - ATTACK - Shoot weapon - Values 0 or 1
|
||||
[14] - TURN_RIGHT - Turn right - Values 0 or 1
|
||||
[15] - TURN_LEFT - Turn left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+ 1 - Killing the monster
|
||||
-0.0001 - 35 times per second - Kill the monster faster!
|
||||
|
||||
Goal: 0.5 point
|
||||
Kill the monster
|
||||
|
||||
Hint: Missile launcher takes longer to load. You must wait a good second after the game starts
|
||||
before trying to fire it.
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Monster is dead
|
||||
- Out of missile (you only have one)
|
||||
- Timeout (20 seconds - 700 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[0] = 0 # ATTACK
|
||||
actions[14] = 1 # TURN_RIGHT
|
||||
actions[15] = 0 # TURN_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomPredictPositionEnv, self).__init__(6)
|
@@ -1,42 +0,0 @@
|
||||
import logging
|
||||
from gym.envs.doom import doom_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DoomTakeCoverEnv(doom_env.DoomEnv):
|
||||
"""
|
||||
------------ Training Mission 8 - Take Cover ------------
|
||||
This map is to train you on the damage of incoming missiles.
|
||||
It is a rectangular map with monsters firing missiles and fireballs
|
||||
at you. You need to survive as long as possible.
|
||||
|
||||
Allowed actions:
|
||||
[10] - MOVE_RIGHT - Move to the right - Values 0 or 1
|
||||
[11] - MOVE_LEFT - Move to the left - Values 0 or 1
|
||||
Note: see controls.md for details
|
||||
|
||||
Rewards:
|
||||
+ 1 - 35 times per second - Survive as long as possible
|
||||
|
||||
Goal: 750 points
|
||||
Survive for ~ 20 seconds
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard only: Arrow Keys, '<', '>' and Ctrl)
|
||||
|
||||
Ends when:
|
||||
- Player is dead (one or two fireballs should be enough to kill you)
|
||||
- Timeout (60 seconds - 2,100 frames)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[10] = 0 # MOVE_RIGHT
|
||||
actions[11] = 1 # MOVE_LEFT
|
||||
-----------------------------------------------------
|
||||
"""
|
||||
def __init__(self):
|
||||
super(DoomTakeCoverEnv, self).__init__(7)
|
@@ -1,116 +0,0 @@
|
||||
"""
|
||||
------------ Meta - Doom ------------
|
||||
This is a meta map that combines all 9 Doom levels.
|
||||
|
||||
Levels:
|
||||
|
||||
0 - Doom Basic
|
||||
1 - Doom Corridor
|
||||
2 - Doom DefendCenter
|
||||
3 - Doom DefendLine
|
||||
4 - Doom HealthGathering
|
||||
5 - Doom MyWayHome
|
||||
6 - Doom PredictPosition
|
||||
7 - Doom TakeCover
|
||||
8 - Doom Deathmatch
|
||||
|
||||
Goal: 9,000 points
|
||||
- Pass all levels
|
||||
|
||||
Scoring:
|
||||
- Each level score has been standardized on a scale of 0 to 1,000
|
||||
- The passing score for a level is 990 (99th percentile)
|
||||
- A bonus of 450 (50 * 9 levels) is given if all levels are passed
|
||||
- The score for a level is the average of the last 3 tries
|
||||
- If there has been less than 3 tries for a level, the missing tries will have a score of 0
|
||||
(e.g. if you score 1,000 on the first level on your first try, your level score will be (1,000+0+0)/ 3 = 333.33)
|
||||
- The total score is the sum of the level scores, plus the bonus if you passed all levels.
|
||||
|
||||
e.g. List of tries:
|
||||
|
||||
- Level 0: 500
|
||||
- Level 0: 750
|
||||
- Level 0: 800
|
||||
- Level 0: 1,000
|
||||
- Level 1: 100
|
||||
- Level 1: 200
|
||||
|
||||
Level score for level 0 = [1,000 + 800 + 750] / 3 = 850 (Average of last 3 tries)
|
||||
Level score for level 1 = [200 + 100 + 0] / 3 = 100 (Tries not completed have a score of 0)
|
||||
Level score for levels 2 to 8 = 0
|
||||
Bonus score for passing all levels = 0
|
||||
------------------------
|
||||
Total score = 850 + 100 + 0 + 0 = 950
|
||||
|
||||
Changing Level:
|
||||
- To unlock the next level, you must achieve a level score (avg of last 3 tries) of at least 600
|
||||
(i.e. passing 60% of the last level)
|
||||
- There are 2 ways to change level:
|
||||
|
||||
1) Manual method
|
||||
|
||||
- obs, reward, is_finished, info = env.step(action)
|
||||
- if is_finished is true, you can call env.change_level(level_number) to change to an unlocked level
|
||||
- you can see
|
||||
the current level with info["LEVEL"]
|
||||
the list of level score with info["SCORES"],
|
||||
the list of locked levels with info["LOCKED_LEVELS"]
|
||||
your total score with info["TOTAL_REWARD"]
|
||||
|
||||
e.g.
|
||||
import gym
|
||||
env = gym.make('meta-Doom-v0')
|
||||
env.reset()
|
||||
total_score = 0
|
||||
while total_score < 9000:
|
||||
action = [0] * 43
|
||||
obs, reward, is_finished, info = env.step(action)
|
||||
env.render()
|
||||
total_score = info["TOTAL_REWARD"]
|
||||
if is_finished:
|
||||
env.change_level(level_you_want)
|
||||
|
||||
2) Automatic change
|
||||
|
||||
- if you don't call change_level() and the level is finished, the system will automatically select the
|
||||
unlocked level with the lowest level score (which is likely to be the last unlocked level)
|
||||
|
||||
e.g.
|
||||
import gym
|
||||
env = gym.make('meta-Doom-v0')
|
||||
env.reset()
|
||||
total_score = 0
|
||||
while total_score < 9000:
|
||||
action = [0] * 43
|
||||
obs, reward, is_finished, info = env.step(action)
|
||||
env.render()
|
||||
total_score = info["TOTAL_REWARD"]
|
||||
|
||||
Allowed actions:
|
||||
- Each level has their own allowed actions, see each level for details
|
||||
|
||||
Mode:
|
||||
- env.mode can be 'fast', 'normal' or 'human' (e.g. env.mode = 'fast')
|
||||
- 'fast' (default) will run as fast as possible (~75 fps) (best for simulation)
|
||||
- 'normal' will run at roughly 35 fps (easier for human to watch)
|
||||
- 'human' will let you play the game (keyboard: Arrow Keys, '<', '>' and Ctrl, mouse available for Doom Deathmatch)
|
||||
|
||||
e.g. to start in human mode:
|
||||
|
||||
import gym
|
||||
env = gym.make('meta-Doom-v0')
|
||||
env.mode='human'
|
||||
env.reset()
|
||||
num_episodes = 10
|
||||
for i in range(num_episodes):
|
||||
env.step([0] * 43)
|
||||
|
||||
Actions:
|
||||
actions = [0] * 43
|
||||
actions[0] = 0 # ATTACK
|
||||
actions[1] = 0 # USE
|
||||
[...]
|
||||
actions[42] = 0 # MOVE_UP_DOWN_DELTA
|
||||
A full list of possible actions is available in controls.md
|
||||
-----------------------------------------------------
|
||||
"""
|
@@ -22,11 +22,6 @@ def should_skip_env_spec_for_tests(spec):
|
||||
logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
|
||||
return True
|
||||
|
||||
# TODO: Issue #167 - Re-enable these tests after fixing DoomDeathmatch crash
|
||||
if spec._entry_point.startswith('gym.envs.doom:DoomDeathmatchEnv'):
|
||||
logger.warn("Skipping tests for DoomDeathmatchEnv {}".format(spec._entry_point))
|
||||
return True
|
||||
|
||||
# Skip ConvergenceControl tests (the only env in parameter_tuning) according to pull #104
|
||||
if spec._entry_point.startswith('gym.envs.parameter_tuning:'):
|
||||
logger.warn("Skipping tests for parameter_tuning env {}".format(spec._entry_point))
|
||||
|
@@ -71,12 +71,6 @@ add_group(
|
||||
description='Simple text environments to get you started.'
|
||||
)
|
||||
|
||||
add_group(
|
||||
id='doom',
|
||||
name='Doom',
|
||||
description='Doom environments based on VizDoom.'
|
||||
)
|
||||
|
||||
add_group(
|
||||
id='safety',
|
||||
name='Safety',
|
||||
@@ -864,316 +858,6 @@ In WaterWorld, the agent, a blue circle, must navigate around the world capturin
|
||||
"""
|
||||
)
|
||||
|
||||
# doom
|
||||
add_task(
|
||||
id='meta-Doom-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #1 to #9 - Beat all 9 Doom missions.',
|
||||
description="""
|
||||
This is a meta map that combines all 9 Doom levels.
|
||||
|
||||
Levels:
|
||||
- #0 Doom Basic
|
||||
- #1 Doom Corridor
|
||||
- #2 Doom DefendCenter
|
||||
- #3 Doom DefendLine
|
||||
- #4 Doom HealthGathering
|
||||
- #5 Doom MyWayHome
|
||||
- #6 Doom PredictPosition
|
||||
- #7 Doom TakeCover
|
||||
- #8 Doom Deathmatch
|
||||
|
||||
Goal: 9,000 points
|
||||
- Pass all levels
|
||||
|
||||
Scoring:
|
||||
- Each level score has been standardized on a scale of 0 to 1,000
|
||||
- The passing score for a level is 990 (99th percentile)
|
||||
- A bonus of 450 (50 * 9 levels) is given if all levels are passed
|
||||
- The score for a level is the average of the last 3 tries
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomBasic-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #1 - Kill a single monster using your pistol.',
|
||||
description="""
|
||||
This map is rectangular with gray walls, ceiling and floor.
|
||||
You are spawned in the center of the longer wall, and a red
|
||||
circular monster is spawned randomly on the opposite wall.
|
||||
You need to kill the monster (one bullet is enough).
|
||||
|
||||
Goal: 10 points
|
||||
- Kill the monster in 3 secs with 1 shot
|
||||
|
||||
Rewards:
|
||||
- Plus 101 pts for killing the monster
|
||||
- Minus 5 pts for missing a shot
|
||||
- Minus 1 pts every 0.028 secs
|
||||
|
||||
Ends when:
|
||||
- Monster is dead
|
||||
- Player is dead
|
||||
- Timeout (10 seconds - 350 frames)
|
||||
|
||||
Allowed actions:
|
||||
- ATTACK
|
||||
- MOVE_RIGHT
|
||||
- MOVE_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomCorridor-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #2 - Run as fast as possible to grab a vest.',
|
||||
description="""
|
||||
This map is designed to improve your navigation. There is a vest
|
||||
at the end of the corridor, with 6 enemies (3 groups of 2). Your goal
|
||||
is to get to the vest as soon as possible, without being killed.
|
||||
|
||||
Goal: 1,000 points
|
||||
- Reach the vest (or get very close to it)
|
||||
|
||||
Rewards:
|
||||
- Plus distance for getting closer to the vest
|
||||
- Minus distance for getting further from the vest
|
||||
- Minus 100 pts for getting killed
|
||||
|
||||
Ends when:
|
||||
- Player touches vest
|
||||
- Player is dead
|
||||
- Timeout (1 minutes - 2,100 frames)
|
||||
|
||||
Allowed actions:
|
||||
- ATTACK
|
||||
- MOVE_RIGHT
|
||||
- MOVE_LEFT
|
||||
- MOVE_FORWARD
|
||||
- TURN_RIGHT
|
||||
- TURN_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomDefendCenter-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #3 - Kill enemies coming at your from all sides.',
|
||||
description="""
|
||||
This map is designed to teach you how to kill and how to stay alive.
|
||||
You will also need to keep an eye on your ammunition level. You are only
|
||||
rewarded for kills, so figure out how to stay alive.
|
||||
|
||||
The map is a circle with monsters. You are in the middle. Monsters will
|
||||
respawn with additional health when killed. Kill as many as you can
|
||||
before you run out of ammo.
|
||||
|
||||
Goal: 10 points
|
||||
- Kill 11 monsters (you have 26 ammo)
|
||||
|
||||
Rewards:
|
||||
- Plus 1 point for killing a monster
|
||||
- Minus 1 point for getting killed
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (60 seconds - 2100 frames)
|
||||
|
||||
Allowed actions:
|
||||
- ATTACK
|
||||
- TURN_RIGHT
|
||||
- TURN_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomDefendLine-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #4 - Kill enemies on the other side of the room.',
|
||||
description="""
|
||||
This map is designed to teach you how to kill and how to stay alive.
|
||||
Your ammo will automatically replenish. You are only rewarded for kills,
|
||||
so figure out how to stay alive.
|
||||
|
||||
The map is a rectangle with monsters on the other side. Monsters will
|
||||
respawn with additional health when killed. Kill as many as you can
|
||||
before they kill you. This map is harder than the previous.
|
||||
|
||||
Goal: 15 points
|
||||
- Kill 16 monsters
|
||||
|
||||
Rewards:
|
||||
- Plus 1 point for killing a monster
|
||||
- Minus 1 point for getting killed
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (60 seconds - 2100 frames)
|
||||
|
||||
Allowed actions:
|
||||
- ATTACK
|
||||
- TURN_RIGHT
|
||||
- TURN_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomHealthGathering-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #5 - Learn to grad medkits to survive as long as possible.',
|
||||
description="""
|
||||
This map is a guide on how to survive by collecting health packs.
|
||||
It is a rectangle with green, acidic floor which hurts the player
|
||||
periodically. There are also medkits spread around the map, and
|
||||
additional kits will spawn at interval.
|
||||
|
||||
Goal: 1000 points
|
||||
- Stay alive long enough for approx. 30 secs
|
||||
|
||||
Rewards:
|
||||
- Plus 1 point every 0.028 secs
|
||||
- Minus 100 pts for dying
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (60 seconds - 2,100 frames)
|
||||
|
||||
Allowed actions:
|
||||
- MOVE_FORWARD
|
||||
- TURN_RIGHT
|
||||
- TURN_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomMyWayHome-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #6 - Find the vest in one the 4 rooms.',
|
||||
description="""
|
||||
This map is designed to improve navigational skills. It is a series of
|
||||
interconnected rooms and 1 corridor with a dead end. Each room
|
||||
has a separate color. There is a green vest in one of the room.
|
||||
The vest is always in the same room. Player must find the vest.
|
||||
|
||||
Goal: 0.50 point
|
||||
- Find the vest
|
||||
|
||||
Rewards:
|
||||
- Plus 1 point for finding the vest
|
||||
- Minus 0.0001 point every 0.028 secs
|
||||
|
||||
Ends when:
|
||||
- Vest is found
|
||||
- Timeout (1 minutes - 2,100 frames)
|
||||
|
||||
Allowed actions:
|
||||
- MOVE_FORWARD
|
||||
- TURN_RIGHT
|
||||
- TURN_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomPredictPosition-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #7 - Learn how to kill an enemy with a rocket launcher.',
|
||||
description="""
|
||||
This map is designed to train you on using a rocket launcher.
|
||||
It is a rectangular map with a monster on the opposite side. You need
|
||||
to use your rocket launcher to kill it. The rocket adds a delay between
|
||||
the moment it is fired and the moment it reaches the other side of the room.
|
||||
You need to predict the position of the monster to kill it.
|
||||
|
||||
Goal: 0.5 point
|
||||
- Kill the monster
|
||||
|
||||
Rewards:
|
||||
- Plus 1 point for killing the monster
|
||||
- Minus 0.0001 point every 0.028 secs
|
||||
|
||||
Ends when:
|
||||
- Monster is dead
|
||||
- Out of missile (you only have one)
|
||||
- Timeout (20 seconds - 700 frames)
|
||||
|
||||
Hint: Wait 1 sec for the missile launcher to load.
|
||||
|
||||
Allowed actions:
|
||||
- ATTACK
|
||||
- TURN_RIGHT
|
||||
- TURN_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomTakeCover-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #8 - Survive as long as possible with enemies shooting at you.',
|
||||
description="""
|
||||
This map is to train you on the damage of incoming missiles.
|
||||
It is a rectangular map with monsters firing missiles and fireballs
|
||||
at you. You need to survive as long as possible.
|
||||
|
||||
Goal: 750 points
|
||||
- Survive for approx. 20 seconds
|
||||
|
||||
Rewards:
|
||||
- Plus 1 point every 0.028 secs
|
||||
|
||||
Ends when:
|
||||
- Player is dead (1 or 2 fireballs is enough)
|
||||
- Timeout (60 seconds - 2,100 frames)
|
||||
|
||||
Allowed actions:
|
||||
- MOVE_RIGHT
|
||||
- MOVE_LEFT
|
||||
"""
|
||||
)
|
||||
|
||||
add_task(
|
||||
id='DoomDeathmatch-v0',
|
||||
group='doom',
|
||||
experimental=True,
|
||||
contributor='ppaquette',
|
||||
summary='Mission #9 - Kill as many enemies as possible without being killed.',
|
||||
description="""
|
||||
Kill as many monsters as possible without being killed.
|
||||
|
||||
Goal: 20 points
|
||||
- Kill 20 monsters
|
||||
|
||||
Rewards:
|
||||
- Plus 1 point for killing a monster
|
||||
|
||||
Ends when:
|
||||
- Player is dead
|
||||
- Timeout (3 minutes - 6,300 frames)
|
||||
|
||||
Allowed actions:
|
||||
- ALL
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
# Safety
|
||||
|
||||
# interpretability envs
|
||||
|
@@ -33,8 +33,8 @@ def create_rollout(spec):
|
||||
Returns a bool which indicates whether the new rollout was added to the json file.
|
||||
|
||||
"""
|
||||
# Skip platform-dependent Doom environments
|
||||
if should_skip_env_spec_for_tests(spec) or 'Doom' in spec.id:
|
||||
# Skip platform-dependent
|
||||
if should_skip_env_spec_for_tests(spec):
|
||||
logger.warn("Skipping tests for {}".format(spec.id))
|
||||
return False
|
||||
|
||||
|
3
setup.py
3
setup.py
@@ -11,7 +11,6 @@ extras = {
|
||||
'board_game' : ['pachi-py>=0.0.19'],
|
||||
'box2d': ['box2d-py'],
|
||||
'classic_control': ['PyOpenGL'],
|
||||
'doom': ['doom_py>=0.0.11'],
|
||||
'mujoco': ['mujoco_py>=0.4.3', 'imageio'],
|
||||
'parameter_tuning': ['keras', 'theano'],
|
||||
}
|
||||
@@ -36,6 +35,6 @@ setup(name='gym',
|
||||
'numpy>=1.10.4', 'requests>=2.0', 'six', 'pyglet>=1.2.0',
|
||||
],
|
||||
extras_require=extras,
|
||||
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png', 'envs/doom/assets/*.cfg']},
|
||||
package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']},
|
||||
tests_require=['nose2', 'mock'],
|
||||
)
|
||||
|
Reference in New Issue
Block a user