2016-09-21 14:55:04 -07:00
import argparse
2016-04-27 08:00:58 -07:00
import logging
2016-09-21 14:55:04 -07:00
import sys
2016-04-27 08:00:58 -07:00
import gym
2016-12-23 16:21:42 -08:00
from gym import wrappers
2016-04-27 08:00:58 -07:00
class RandomAgent ( object ) :
2016-12-23 16:21:42 -08:00
""" The world ' s simplest agent! """
2016-04-27 08:00:58 -07:00
def __init__ ( self , action_space ) :
self . action_space = action_space
def act ( self , observation , reward , done ) :
return self . action_space . sample ( )
if __name__ == ' __main__ ' :
2016-09-21 14:55:04 -07:00
parser = argparse . ArgumentParser ( description = None )
parser . add_argument ( ' env_id ' , nargs = ' ? ' , default = ' CartPole-v0 ' , help = ' Select the environment to run ' )
args = parser . parse_args ( )
# Call `undo_logger_setup` if you want to undo Gym's logger setup
# and configure things manually. (The default should be fine most
# of the time.)
gym . undo_logger_setup ( )
2016-04-27 08:00:58 -07:00
logger = logging . getLogger ( )
2016-09-21 14:55:04 -07:00
formatter = logging . Formatter ( ' [ %(asctime)s ] %(message)s ' )
handler = logging . StreamHandler ( sys . stderr )
handler . setFormatter ( formatter )
logger . addHandler ( handler )
# You can set the level to logging.DEBUG or logging.WARN if you
# want to change the amount of output.
2016-04-27 08:00:58 -07:00
logger . setLevel ( logging . INFO )
2016-09-21 14:55:04 -07:00
env = gym . make ( args . env_id )
2016-04-27 08:00:58 -07:00
# You provide the directory to write to (can be an existing
2016-05-26 13:44:14 -07:00
# directory, including one with existing data -- all monitor files
# will be namespaced). You can also dump to a tempdir if you'd
# like: tempfile.mkdtemp().
2016-04-27 08:00:58 -07:00
outdir = ' /tmp/random-agent-results '
2016-12-23 16:21:42 -08:00
env = wrappers . Monitor ( directory = outdir , force = True ) ( env )
2016-10-31 22:20:02 -07:00
env . seed ( 0 )
2016-05-29 09:07:09 -07:00
agent = RandomAgent ( env . action_space )
2016-04-27 08:00:58 -07:00
2016-04-29 10:47:11 +02:00
episode_count = 100
2016-04-27 08:00:58 -07:00
reward = 0
done = False
2016-05-01 23:17:38 -04:00
for i in range ( episode_count ) :
2016-04-27 08:00:58 -07:00
ob = env . reset ( )
2016-12-06 17:07:23 -08:00
while True :
2016-04-27 08:00:58 -07:00
action = agent . act ( ob , reward , done )
ob , reward , done , _ = env . step ( action )
if done :
break
2016-05-26 21:39:57 +03:00
# Note there's no env.render() here. But the environment still can open window and
# render if asked by env.monitor: it calls env.render('rgb_array') to record video.
# Video is not recorded every episode, see capped_cubic_video_schedule for details.
2016-04-27 08:00:58 -07:00
2016-12-23 16:21:42 -08:00
# Close the env and write monitor result info to disk
env . close ( )
2016-04-27 08:00:58 -07:00
# Upload to the scoreboard. We could also do this from another
# process if we wanted.
logger . info ( " Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results. " )
2016-06-01 16:15:18 +02:00
gym . upload ( outdir )