Make agent examples compatible with python 3 (#150)

* make cem agen exaple compatible with python 2 and 3

* make the keyboard_agent example compatible with python 2 and 3

Changing `xrange` to `range` should not impact performance unless we're
generating millions of elements (currently only 1000).

* remove algorithm_id from the upload call
This commit is contained in:
Maciek
2016-06-01 16:15:18 +02:00
committed by Greg Brockman
parent d167a391a4
commit 43992f4752
4 changed files with 20 additions and 12 deletions

View File

@@ -1,7 +1,13 @@
from __future__ import print_function
import gym import gym
import logging import logging
import numpy as np import numpy as np
import json, sys, cPickle, os try:
import cPickle as pickle
except ImportError:
import pickle
import json, sys, os
from os import path from os import path
from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
import argparse import argparse
@@ -77,10 +83,10 @@ if __name__ == '__main__':
# Train the agent, and snapshot each stage # Train the agent, and snapshot each stage
for (i, iterdata) in enumerate( for (i, iterdata) in enumerate(
cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)): cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
print 'Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']) print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
agent = BinaryActionLinearPolicy(iterdata['theta_mean']) agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
if args.display: do_rollout(agent, env, 200, render=True) if args.display: do_rollout(agent, env, 200, render=True)
writefile('agent-%.4i.pkl'%i, cPickle.dumps(agent, -1)) writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))
# Write out the env at the end so we store the parameters of this # Write out the env at the end so we store the parameters of this
# environment. # environment.
@@ -89,4 +95,4 @@ if __name__ == '__main__':
env.monitor.close() env.monitor.close()
logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.") logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
gym.upload(outdir, algorithm_id='cem') gym.upload(outdir)

View File

@@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, gym import sys, gym
# #
# Test yourself as a learning agent! Pass environment name as a command-line argument. # Test yourself as a learning agent! Pass environment name as a command-line argument.
# #
env = gym.make('LunarLander-v0' if len(sys.argv)<2 else sys.argv[1]) env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
ACTIONS = env.action_space.n ACTIONS = env.action_space.n
ROLLOUT_TIME = 1000 ROLLOUT_TIME = 1000
@@ -40,9 +42,9 @@ def rollout(env):
human_wants_restart = False human_wants_restart = False
obser = env.reset() obser = env.reset()
skip = 0 skip = 0
for t in xrange(ROLLOUT_TIME): for t in range(ROLLOUT_TIME):
if not skip: if not skip:
#print "taking action {}".format(human_agent_action) #print("taking action {}".format(human_agent_action))
a = human_agent_action a = human_agent_action
skip = SKIP_CONTROL skip = SKIP_CONTROL
else: else:
@@ -57,9 +59,9 @@ def rollout(env):
import time import time
time.sleep(0.1) time.sleep(0.1)
print "ACTIONS={}".format(ACTIONS) print("ACTIONS={}".format(ACTIONS))
print "Press keys 1 2 3 ... to take actions 1 2 3 ..." print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
print "No keys pressed is taking action 0" print("No keys pressed is taking action 0")
while 1: while 1:
rollout(env) rollout(env)

View File

@@ -55,4 +55,4 @@ if __name__ == '__main__':
# Upload to the scoreboard. We could also do this from another # Upload to the scoreboard. We could also do this from another
# process if we wanted. # process if we wanted.
logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.") logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
gym.upload(outdir, algorithm_id='random') gym.upload(outdir)

View File

@@ -12,7 +12,7 @@ from gym.utils import colorize, seeding
import pyglet import pyglet
from pyglet.gl import * from pyglet.gl import *
from car_dynamics import Car from .car_dynamics import Car
# Easiest continuous control task to learn from pixels, a top-down racing environment. # Easiest continuous control task to learn from pixels, a top-down racing environment.
# Discreet control is reasonable in this environment as well, on/off discretisation is # Discreet control is reasonable in this environment as well, on/off discretisation is