Cleanup, removal of unmaintained code (#836)

* add dtype to Box

* remove board_game, debugging, safety, parameter_tuning environments

* massive set of breaking changes
- remove python logging module
- _step, _reset, _seed, _close => non underscored method
- remove benchmark and scoring folder

* Improve render("human"), now resizable, closable window.

* get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods

* CubeCrash unit test environment

* followup fixes

* MemorizeDigits unit test envrionment

* refactored spaces a bit
fixed indentation
disabled test_env_semantics

* fix unit tests

* fixes

* CubeCrash, MemorizeDigits tested

* gym backwards compatibility patch

* gym backwards compatibility, followup fixes

* changelist, add spaces to main namespaces

* undo_logger_setup for backwards compat

* remove configuration.py
This commit is contained in:
John Schulman
2018-01-25 18:20:14 -08:00
committed by GitHub
parent 6af4a5b9b2
commit 4c460ba6c8
113 changed files with 943 additions and 8875 deletions

View File

@@ -1,10 +1,12 @@
#!/usr/bin/env python
from __future__ import print_function
import sys, gym
import sys, gym, time
#
# Test yourself as a learning agent! Pass environment name as a command-line argument.
# Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
#
# python keyboard_agent.py SpaceInvadersNoFrameskip-v4
#
env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
@@ -12,7 +14,6 @@ env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
if not hasattr(env.action_space, 'n'):
raise Exception('Keyboard agent only supports discrete action spaces')
ACTIONS = env.action_space.n
ROLLOUT_TIME = 1000
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.
@@ -44,26 +45,36 @@ def rollout(env):
human_wants_restart = False
obser = env.reset()
skip = 0
for t in range(ROLLOUT_TIME):
total_reward = 0
total_timesteps = 0
while 1:
if not skip:
#print("taking action {}".format(human_agent_action))
a = human_agent_action
total_timesteps += 1
skip = SKIP_CONTROL
else:
skip -= 1
obser, r, done, info = env.step(a)
env.render()
if r != 0:
print("reward %0.3f" % r)
total_reward += r
window_still_open = env.render()
if window_still_open==False: return False
if done: break
if human_wants_restart: break
while human_sets_pause:
env.render()
import time
time.sleep(0.1)
time.sleep(0.1)
print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
print("ACTIONS={}".format(ACTIONS))
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
print("No keys pressed is taking action 0")
while 1:
rollout(env)
window_still_open = rollout(env)
if window_still_open==False: break