mirror of
https://github.com/Farama-Foundation/Gymnasium.git
synced 2025-08-22 07:02:19 +00:00
Make agent examples compatible with python 3 (#150)
* make cem agen exaple compatible with python 2 and 3 * make the keyboard_agent example compatible with python 2 and 3 Changing `xrange` to `range` should not impact performance unless we're generating millions of elements (currently only 1000). * remove algorithm_id from the upload call
This commit is contained in:
@@ -1,7 +1,13 @@
|
|||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import logging
|
import logging
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import json, sys, cPickle, os
|
try:
|
||||||
|
import cPickle as pickle
|
||||||
|
except ImportError:
|
||||||
|
import pickle
|
||||||
|
import json, sys, os
|
||||||
from os import path
|
from os import path
|
||||||
from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
|
from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
|
||||||
import argparse
|
import argparse
|
||||||
@@ -77,10 +83,10 @@ if __name__ == '__main__':
|
|||||||
# Train the agent, and snapshot each stage
|
# Train the agent, and snapshot each stage
|
||||||
for (i, iterdata) in enumerate(
|
for (i, iterdata) in enumerate(
|
||||||
cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
|
cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
|
||||||
print 'Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean'])
|
print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
|
||||||
agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
|
agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
|
||||||
if args.display: do_rollout(agent, env, 200, render=True)
|
if args.display: do_rollout(agent, env, 200, render=True)
|
||||||
writefile('agent-%.4i.pkl'%i, cPickle.dumps(agent, -1))
|
writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))
|
||||||
|
|
||||||
# Write out the env at the end so we store the parameters of this
|
# Write out the env at the end so we store the parameters of this
|
||||||
# environment.
|
# environment.
|
||||||
@@ -89,4 +95,4 @@ if __name__ == '__main__':
|
|||||||
env.monitor.close()
|
env.monitor.close()
|
||||||
|
|
||||||
logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
|
logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
|
||||||
gym.upload(outdir, algorithm_id='cem')
|
gym.upload(outdir)
|
||||||
|
@@ -1,11 +1,13 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys, gym
|
import sys, gym
|
||||||
|
|
||||||
#
|
#
|
||||||
# Test yourself as a learning agent! Pass environment name as a command-line argument.
|
# Test yourself as a learning agent! Pass environment name as a command-line argument.
|
||||||
#
|
#
|
||||||
|
|
||||||
env = gym.make('LunarLander-v0' if len(sys.argv)<2 else sys.argv[1])
|
env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
|
||||||
|
|
||||||
ACTIONS = env.action_space.n
|
ACTIONS = env.action_space.n
|
||||||
ROLLOUT_TIME = 1000
|
ROLLOUT_TIME = 1000
|
||||||
@@ -40,9 +42,9 @@ def rollout(env):
|
|||||||
human_wants_restart = False
|
human_wants_restart = False
|
||||||
obser = env.reset()
|
obser = env.reset()
|
||||||
skip = 0
|
skip = 0
|
||||||
for t in xrange(ROLLOUT_TIME):
|
for t in range(ROLLOUT_TIME):
|
||||||
if not skip:
|
if not skip:
|
||||||
#print "taking action {}".format(human_agent_action)
|
#print("taking action {}".format(human_agent_action))
|
||||||
a = human_agent_action
|
a = human_agent_action
|
||||||
skip = SKIP_CONTROL
|
skip = SKIP_CONTROL
|
||||||
else:
|
else:
|
||||||
@@ -57,9 +59,9 @@ def rollout(env):
|
|||||||
import time
|
import time
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
print "ACTIONS={}".format(ACTIONS)
|
print("ACTIONS={}".format(ACTIONS))
|
||||||
print "Press keys 1 2 3 ... to take actions 1 2 3 ..."
|
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
|
||||||
print "No keys pressed is taking action 0"
|
print("No keys pressed is taking action 0")
|
||||||
|
|
||||||
while 1:
|
while 1:
|
||||||
rollout(env)
|
rollout(env)
|
||||||
|
@@ -55,4 +55,4 @@ if __name__ == '__main__':
|
|||||||
# Upload to the scoreboard. We could also do this from another
|
# Upload to the scoreboard. We could also do this from another
|
||||||
# process if we wanted.
|
# process if we wanted.
|
||||||
logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
|
logger.info("Successfully ran RandomAgent. Now trying to upload results to the scoreboard. If it breaks, you can always just try re-uploading the same results.")
|
||||||
gym.upload(outdir, algorithm_id='random')
|
gym.upload(outdir)
|
||||||
|
@@ -12,7 +12,7 @@ from gym.utils import colorize, seeding
|
|||||||
import pyglet
|
import pyglet
|
||||||
from pyglet.gl import *
|
from pyglet.gl import *
|
||||||
|
|
||||||
from car_dynamics import Car
|
from .car_dynamics import Car
|
||||||
|
|
||||||
# Easiest continuous control task to learn from pixels, a top-down racing environment.
|
# Easiest continuous control task to learn from pixels, a top-down racing environment.
|
||||||
# Discreet control is reasonable in this environment as well, on/off discretisation is
|
# Discreet control is reasonable in this environment as well, on/off discretisation is
|
||||||
|
Reference in New Issue
Block a user