2016-04-27 08:00:58 -07:00
import logging
2016-05-03 22:27:42 +03:00
import os , sys
2016-04-27 08:00:58 -07:00
import gym
# The world's simplest agent!
class RandomAgent ( object ) :
def __init__ ( self , action_space ) :
self . action_space = action_space
def act ( self , observation , reward , done ) :
return self . action_space . sample ( )
if __name__ == ' __main__ ' :
# You can optionally set up the logger. Also fine to set the level
# to logging.DEBUG or logging.WARN if you want to change the
2016-05-03 22:27:42 +03:00
# amount of output.
2016-04-27 08:00:58 -07:00
logger = logging . getLogger ( )
logger . setLevel ( logging . INFO )
2016-05-03 22:27:42 +03:00
env = gym . make ( ' CartPole-v0 ' if len ( sys . argv ) < 2 else sys . argv [ 1 ] )
2016-04-27 08:00:58 -07:00
agent = RandomAgent ( env . action_space )
# You provide the directory to write to (can be an existing
2016-05-26 13:44:14 -07:00
# directory, including one with existing data -- all monitor files
# will be namespaced). You can also dump to a tempdir if you'd
# like: tempfile.mkdtemp().
2016-04-27 08:00:58 -07:00
outdir = ' /tmp/random-agent-results '
env . monitor . start ( outdir , force = True )
2016-04-29 10:47:11 +02:00
episode_count = 100
max_steps = 200
2016-04-27 08:00:58 -07:00
reward = 0
done = False
2016-05-01 23:17:38 -04:00
for i in range ( episode_count ) :
2016-04-27 08:00:58 -07:00
ob = env . reset ( )
2016-05-01 23:17:38 -04:00
for j in range ( max_steps ) :
2016-04-27 08:00:58 -07:00
action = agent . act ( ob , reward , done )
ob , reward , done , _ = env . step ( action )
if done :
break
2016-05-26 21:39:57 +03:00
# Note there's no env.render() here. But the environment still can open window and
# render if asked by env.monitor: it calls env.render('rgb_array') to record video.
# Video is not recorded every episode, see capped_cubic_video_schedule for details.
2016-04-27 08:00:58 -07:00
# Dump result info to disk
env . monitor . close ( )
# Upload to the scoreboard. We could also do this from another
# process if we wanted.
logger . info ( " Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results. " )
gym . upload ( outdir , algorithm_id = ' random ' )