2016-04-27 08:00:58 -07:00
|
|
|
from gym.envs.registration import registry, register, make, spec
|
|
|
|
|
|
|
|
# Algorithmic
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Copy-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:CopyEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='RepeatCopy-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
reward_threshold=75.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='ReversedAddition-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
|
|
|
|
kwargs={'rows' : 2},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='ReversedAddition3-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
|
|
|
|
kwargs={'rows' : 3},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='DuplicatedInput-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
reward_threshold=9.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Reverse-v0',
|
|
|
|
entry_point='gym.envs.algorithmic:ReverseEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
reward_threshold=25.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Classic
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='CartPole-v0',
|
|
|
|
entry_point='gym.envs.classic_control:CartPoleEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=195.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-08-06 00:15:50 -07:00
|
|
|
register(
|
|
|
|
id='CartPole-v1',
|
|
|
|
entry_point='gym.envs.classic_control:CartPoleEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=500,
|
2016-08-06 00:15:50 -07:00
|
|
|
reward_threshold=475.0,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='MountainCar-v0',
|
|
|
|
entry_point='gym.envs.classic_control:MountainCarEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=-110.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-08-24 23:10:58 +02:00
|
|
|
register(
|
|
|
|
id='MountainCarContinuous-v0',
|
|
|
|
entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=999,
|
2016-08-24 23:10:58 +02:00
|
|
|
reward_threshold=90.0,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='Pendulum-v0',
|
|
|
|
entry_point='gym.envs.classic_control:PendulumEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-08-06 00:15:50 -07:00
|
|
|
id='Acrobot-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.classic_control:AcrobotEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=500,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
# Box2d
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
2016-05-25 23:19:15 +03:00
|
|
|
id='LunarLander-v2',
|
2016-05-03 22:27:42 +03:00
|
|
|
entry_point='gym.envs.box2d:LunarLander',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-16 17:12:44 +03:00
|
|
|
reward_threshold=200,
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
|
|
|
|
2016-08-25 02:08:32 +03:00
|
|
|
register(
|
|
|
|
id='LunarLanderContinuous-v2',
|
|
|
|
entry_point='gym.envs.box2d:LunarLanderContinuous',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-08-25 02:08:32 +03:00
|
|
|
reward_threshold=200,
|
|
|
|
)
|
|
|
|
|
2016-05-03 22:27:42 +03:00
|
|
|
register(
|
2016-05-25 23:19:15 +03:00
|
|
|
id='BipedalWalker-v2',
|
2016-05-03 22:27:42 +03:00
|
|
|
entry_point='gym.envs.box2d:BipedalWalker',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1600,
|
2016-05-16 17:12:44 +03:00
|
|
|
reward_threshold=300,
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-05-25 23:19:15 +03:00
|
|
|
id='BipedalWalkerHardcore-v2',
|
2016-05-03 22:27:42 +03:00
|
|
|
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=2000,
|
2016-05-16 17:12:44 +03:00
|
|
|
reward_threshold=300,
|
2016-05-03 22:27:42 +03:00
|
|
|
)
|
|
|
|
|
2016-05-26 21:39:57 +03:00
|
|
|
register(
|
|
|
|
id='CarRacing-v0',
|
|
|
|
entry_point='gym.envs.box2d:CarRacing',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-26 21:39:57 +03:00
|
|
|
reward_threshold=900,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
# Toy Text
|
|
|
|
# ----------------------------------------
|
|
|
|
|
2016-05-09 22:05:56 -07:00
|
|
|
register(
|
|
|
|
id='Blackjack-v0',
|
|
|
|
entry_point='gym.envs.toy_text:BlackjackEnv',
|
|
|
|
)
|
|
|
|
|
2017-03-11 15:17:35 -05:00
|
|
|
register(
|
|
|
|
id='KellyCoinflip-v0',
|
|
|
|
entry_point='gym.envs.toy_text:KellyCoinflipEnv',
|
|
|
|
reward_threshold=246.61,
|
|
|
|
)
|
2017-03-17 22:37:58 -04:00
|
|
|
register(
|
|
|
|
id='KellyCoinflipGeneralized-v0',
|
|
|
|
entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
|
|
|
|
)
|
2017-03-11 15:17:35 -05:00
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='FrozenLake-v0',
|
|
|
|
entry_point='gym.envs.toy_text:FrozenLakeEnv',
|
|
|
|
kwargs={'map_name' : '4x4'},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=100,
|
2016-05-24 17:58:17 -07:00
|
|
|
reward_threshold=0.78, # optimum = .8196
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='FrozenLake8x8-v0',
|
|
|
|
entry_point='gym.envs.toy_text:FrozenLakeEnv',
|
|
|
|
kwargs={'map_name' : '8x8'},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-05-24 17:58:17 -07:00
|
|
|
reward_threshold=0.99, # optimum = 1
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-05-09 22:04:34 -07:00
|
|
|
register(
|
|
|
|
id='NChain-v0',
|
|
|
|
entry_point='gym.envs.toy_text:NChainEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-09 22:04:34 -07:00
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='Roulette-v0',
|
|
|
|
entry_point='gym.envs.toy_text:RouletteEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=100,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2017-01-10 00:10:31 +05:00
|
|
|
id='Taxi-v2',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.toy_text.taxi:TaxiEnv',
|
2017-01-10 00:10:31 +05:00
|
|
|
reward_threshold=8, # optimum = 8.46
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-06-29 18:58:25 +10:00
|
|
|
register(
|
|
|
|
id='GuessingGame-v0',
|
|
|
|
entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-29 18:58:25 +10:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='HotterColder-v0',
|
|
|
|
entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-29 18:58:25 +10:00
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
# Mujoco
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
# 2D
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Reacher-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:ReacherEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=50,
|
2016-05-30 19:20:36 -07:00
|
|
|
reward_threshold=-3.75,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2017-04-22 20:57:36 -07:00
|
|
|
register(
|
|
|
|
id='Pusher-v0',
|
|
|
|
entry_point='gym.envs.mujoco:PusherEnv',
|
|
|
|
max_episode_steps=100,
|
|
|
|
reward_threshold=0.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Thrower-v0',
|
|
|
|
entry_point='gym.envs.mujoco:ThrowerEnv',
|
|
|
|
max_episode_steps=100,
|
|
|
|
reward_threshold=0.0,
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Striker-v0',
|
|
|
|
entry_point='gym.envs.mujoco:StrikerEnv',
|
|
|
|
max_episode_steps=100,
|
|
|
|
reward_threshold=0.0,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='InvertedPendulum-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-30 19:20:36 -07:00
|
|
|
reward_threshold=950.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='InvertedDoublePendulum-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-30 19:20:36 -07:00
|
|
|
reward_threshold=9100.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='HalfCheetah-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:HalfCheetahEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=4800.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Hopper-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:HopperEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=3800.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Swimmer-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:SwimmerEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=360.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Walker2d-v1',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:Walker2dEnv',
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Ant-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:AntEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-10 09:18:13 -07:00
|
|
|
reward_threshold=6000.0,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-04-30 22:47:51 -07:00
|
|
|
id='Humanoid-v1',
|
2016-04-27 08:00:58 -07:00
|
|
|
entry_point='gym.envs.mujoco:HumanoidEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
2017-01-03 19:37:29 -08:00
|
|
|
|
2016-05-23 15:01:25 +08:00
|
|
|
register(
|
2016-05-30 09:47:58 +08:00
|
|
|
id='HumanoidStandup-v1',
|
2016-05-23 15:01:25 +08:00
|
|
|
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=1000,
|
2016-05-23 15:01:25 +08:00
|
|
|
)
|
2016-04-27 08:00:58 -07:00
|
|
|
|
|
|
|
# Atari
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
|
|
|
|
for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
|
|
|
|
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
|
|
|
|
'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
|
|
|
|
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
|
2017-03-19 16:14:04 -07:00
|
|
|
'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
|
2016-04-27 08:00:58 -07:00
|
|
|
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
|
|
|
|
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
|
|
|
|
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
|
|
|
|
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
|
|
|
|
for obs_type in ['image', 'ram']:
|
|
|
|
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
|
|
|
|
name = ''.join([g.capitalize() for g in game.split('_')])
|
|
|
|
if obs_type == 'ram':
|
|
|
|
name = '{}-ram'.format(name)
|
2016-05-29 09:07:09 -07:00
|
|
|
|
|
|
|
nondeterministic = False
|
|
|
|
if game == 'elevator_action' and obs_type == 'ram':
|
|
|
|
# ElevatorAction-ram-v0 seems to yield slightly
|
|
|
|
# non-deterministic observations about 10% of the time. We
|
|
|
|
# should track this down eventually, but for now we just
|
2016-05-29 09:11:49 -07:00
|
|
|
# mark it as nondeterministic.
|
2016-05-29 09:07:09 -07:00
|
|
|
nondeterministic = True
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
register(
|
|
|
|
id='{}-v0'.format(name),
|
|
|
|
entry_point='gym.envs.atari:AtariEnv',
|
2016-09-21 00:36:56 -07:00
|
|
|
kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=10000,
|
2016-05-29 09:07:09 -07:00
|
|
|
nondeterministic=nondeterministic,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
2016-09-05 23:39:32 -07:00
|
|
|
register(
|
2017-05-05 15:50:22 -04:00
|
|
|
id='{}-v4'.format(name),
|
2016-09-05 23:39:32 -07:00
|
|
|
entry_point='gym.envs.atari:AtariEnv',
|
|
|
|
kwargs={'game': game, 'obs_type': obs_type},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=100000,
|
2016-09-05 23:39:32 -07:00
|
|
|
nondeterministic=nondeterministic,
|
|
|
|
)
|
|
|
|
|
2016-09-02 17:26:33 -07:00
|
|
|
# Standard Deterministic (as in the original DeepMind paper)
|
2016-08-25 08:58:09 -07:00
|
|
|
if game == 'space_invaders':
|
|
|
|
frameskip = 3
|
|
|
|
else:
|
|
|
|
frameskip = 4
|
|
|
|
|
2016-09-24 15:55:56 -07:00
|
|
|
# Use a deterministic frame skip.
|
|
|
|
register(
|
|
|
|
id='{}Deterministic-v0'.format(name),
|
|
|
|
entry_point='gym.envs.atari:AtariEnv',
|
|
|
|
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=100000,
|
2016-09-24 15:55:56 -07:00
|
|
|
nondeterministic=nondeterministic,
|
|
|
|
)
|
|
|
|
|
2016-08-25 08:58:09 -07:00
|
|
|
register(
|
2017-05-05 15:50:22 -04:00
|
|
|
id='{}Deterministic-v4'.format(name),
|
2016-09-05 23:39:32 -07:00
|
|
|
entry_point='gym.envs.atari:AtariEnv',
|
|
|
|
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=100000,
|
2016-09-05 23:39:32 -07:00
|
|
|
nondeterministic=nondeterministic,
|
|
|
|
)
|
|
|
|
|
2016-09-24 15:55:56 -07:00
|
|
|
register(
|
|
|
|
id='{}NoFrameskip-v0'.format(name),
|
|
|
|
entry_point='gym.envs.atari:AtariEnv',
|
|
|
|
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25}, # A frameskip of 1 means we get every frame
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=frameskip * 100000,
|
2016-09-24 15:55:56 -07:00
|
|
|
nondeterministic=nondeterministic,
|
|
|
|
)
|
|
|
|
|
2016-09-05 23:39:32 -07:00
|
|
|
# No frameskip. (Atari has no entropy source, so these are
|
|
|
|
# deterministic environments.)
|
2016-09-02 17:26:33 -07:00
|
|
|
register(
|
2017-05-05 15:50:22 -04:00
|
|
|
id='{}NoFrameskip-v4'.format(name),
|
2016-09-05 23:39:32 -07:00
|
|
|
entry_point='gym.envs.atari:AtariEnv',
|
|
|
|
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=frameskip * 100000,
|
2016-09-02 17:26:33 -07:00
|
|
|
nondeterministic=nondeterministic,
|
|
|
|
)
|
|
|
|
|
2016-04-27 08:00:58 -07:00
|
|
|
# Board games
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Go9x9-v0',
|
|
|
|
entry_point='gym.envs.board_game:GoEnv',
|
|
|
|
kwargs={
|
|
|
|
'player_color': 'black',
|
|
|
|
'opponent': 'pachi:uct:_2400',
|
|
|
|
'observation_type': 'image3c',
|
|
|
|
'illegal_move_mode': 'lose',
|
|
|
|
'board_size': 9,
|
|
|
|
},
|
2016-05-29 09:07:09 -07:00
|
|
|
# The pachi player seems not to be determistic given a fixed seed.
|
|
|
|
# (Reproduce by running 'import gym; h = gym.make('Go9x9-v0'); h.seed(1); h.reset(); h.step(15); h.step(16); h.step(17)' a few times.)
|
|
|
|
#
|
|
|
|
# This is probably due to a computation time limit.
|
2016-05-29 09:11:49 -07:00
|
|
|
nondeterministic=True,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='Go19x19-v0',
|
|
|
|
entry_point='gym.envs.board_game:GoEnv',
|
|
|
|
kwargs={
|
|
|
|
'player_color': 'black',
|
|
|
|
'opponent': 'pachi:uct:_2400',
|
|
|
|
'observation_type': 'image3c',
|
|
|
|
'illegal_move_mode': 'lose',
|
|
|
|
'board_size': 19,
|
|
|
|
},
|
2016-05-29 09:11:49 -07:00
|
|
|
nondeterministic=True,
|
2016-04-27 08:00:58 -07:00
|
|
|
)
|
2016-05-03 20:10:00 +01:00
|
|
|
|
|
|
|
register(
|
|
|
|
id='Hex9x9-v0',
|
|
|
|
entry_point='gym.envs.board_game:HexEnv',
|
|
|
|
kwargs={
|
|
|
|
'player_color': 'black',
|
|
|
|
'opponent': 'random',
|
|
|
|
'observation_type': 'numpy3c',
|
|
|
|
'illegal_move_mode': 'lose',
|
|
|
|
'board_size': 9,
|
|
|
|
},
|
|
|
|
)
|
2016-05-17 00:46:03 -07:00
|
|
|
|
2016-05-31 00:25:50 -07:00
|
|
|
# Debugging
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='OneRoundDeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:OneRoundDeterministicRewardEnv',
|
|
|
|
local_only=True
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='TwoRoundDeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:TwoRoundDeterministicRewardEnv',
|
|
|
|
local_only=True
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='OneRoundNondeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:OneRoundNondeterministicRewardEnv',
|
|
|
|
local_only=True
|
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='TwoRoundNondeterministicReward-v0',
|
|
|
|
entry_point='gym.envs.debugging:TwoRoundNondeterministicRewardEnv',
|
2016-05-31 00:31:50 -07:00
|
|
|
local_only=True,
|
|
|
|
)
|
2016-05-16 21:10:27 +02:00
|
|
|
|
2016-05-31 20:34:40 +02:00
|
|
|
# Parameter tuning
|
2016-05-16 21:10:27 +02:00
|
|
|
# ----------------------------------------
|
|
|
|
register(
|
|
|
|
id='ConvergenceControl-v0',
|
|
|
|
entry_point='gym.envs.parameter_tuning:ConvergenceControl',
|
2016-05-31 00:25:50 -07:00
|
|
|
)
|
2016-05-31 20:34:40 +02:00
|
|
|
|
|
|
|
register(
|
|
|
|
id='CNNClassifierTraining-v0',
|
|
|
|
entry_point='gym.envs.parameter_tuning:CNNClassifierTraining',
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
# Safety
|
|
|
|
# ----------------------------------------
|
|
|
|
|
|
|
|
# interpretability envs
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='PredictActionsCartpole-v0',
|
|
|
|
entry_point='gym.envs.safety:PredictActionsCartpoleEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='PredictObsCartpole-v0',
|
|
|
|
entry_point='gym.envs.safety:PredictObsCartpoleEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
# semi_supervised envs
|
|
|
|
# probably the easiest:
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='SemisuperPendulumNoise-v0',
|
|
|
|
entry_point='gym.envs.safety:SemisuperPendulumNoiseEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
# somewhat harder because of higher variance:
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='SemisuperPendulumRandom-v0',
|
|
|
|
entry_point='gym.envs.safety:SemisuperPendulumRandomEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
# probably the hardest because you only get a constant number of rewards in total:
|
|
|
|
register(
|
2016-06-14 15:57:25 -07:00
|
|
|
id='SemisuperPendulumDecay-v0',
|
|
|
|
entry_point='gym.envs.safety:SemisuperPendulumDecayEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
# off_switch envs
|
|
|
|
register(
|
|
|
|
id='OffSwitchCartpole-v0',
|
|
|
|
entry_point='gym.envs.safety:OffSwitchCartpoleEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-17 18:38:36 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
register(
|
|
|
|
id='OffSwitchCartpoleProb-v0',
|
|
|
|
entry_point='gym.envs.safety:OffSwitchCartpoleProbEnv',
|
2017-02-01 13:10:59 -08:00
|
|
|
max_episode_steps=200,
|
2016-06-12 13:36:50 -07:00
|
|
|
)
|