Cleanup, removal of unmaintained code (#836)

* add dtype to Box * remove board_game, debugging, safety, parameter_tuning environments * massive set of breaking changes - remove python logging module - _step, _reset, _seed, _close => non underscored method - remove benchmark and scoring folder * Improve render("human"), now resizable, closable window. * get rid of default step and reset in wrappers, so it doesn’t silently fail for people with underscore methods * CubeCrash unit test environment * followup fixes * MemorizeDigits unit test envrionment * refactored spaces a bit fixed indentation disabled test_env_semantics * fix unit tests * fixes * CubeCrash, MemorizeDigits tested * gym backwards compatibility patch * gym backwards compatibility, followup fixes * changelist, add spaces to main namespaces * undo_logger_setup for backwards compat * remove configuration.py
2025-08-20 14:02:03 +00:00 · 2018-01-25 18:20:14 -08:00
parent 6af4a5b9b2
commit 4c460ba6c8
113 changed files with 943 additions and 8875 deletions
--- a/examples/agents/keyboard_agent.py
+++ b/examples/agents/keyboard_agent.py
@@ -1,10 +1,12 @@
 #!/usr/bin/env python
 from __future__ import print_function

-import sys, gym
+import sys, gym, time

 #
-# Test yourself as a learning agent! Pass environment name as a command-line argument.
+# Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
+#
+# python keyboard_agent.py SpaceInvadersNoFrameskip-v4
 #

 env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
@@ -12,7 +14,6 @@ env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
 if not hasattr(env.action_space, 'n'):
    raise Exception('Keyboard agent only supports discrete action spaces')
 ACTIONS = env.action_space.n
-ROLLOUT_TIME = 1000
 SKIP_CONTROL = 0    # Use previous control decision SKIP_CONTROL times, that's how you
                    # can test what skip is still usable.

@@ -44,26 +45,36 @@ def rollout(env):
    human_wants_restart = False
    obser = env.reset()
    skip = 0
-    for t in range(ROLLOUT_TIME):
+    total_reward = 0
+    total_timesteps = 0
+    while 1:
        if not skip:
            #print("taking action {}".format(human_agent_action))
            a = human_agent_action
+            total_timesteps += 1
            skip = SKIP_CONTROL
        else:
            skip -= 1

        obser, r, done, info = env.step(a)
-        env.render()
+        if r != 0:
+            print("reward %0.3f" % r)
+        total_reward += r
+        window_still_open = env.render()
+        if window_still_open==False: return False
        if done: break
        if human_wants_restart: break
        while human_sets_pause:
            env.render()
-            import time
            time.sleep(0.1)
+        time.sleep(0.1)
+    print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))

 print("ACTIONS={}".format(ACTIONS))
 print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
 print("No keys pressed is taking action 0")

 while 1:
-    rollout(env)
+    window_still_open = rollout(env)
+    if window_still_open==False: break
+