#!/usr/bin/env python import gym from gym import spaces, envs import argparse import numpy as np import itertools import time parser = argparse.ArgumentParser() parser.add_argument("env") parser.add_argument("--mode", choices=["noop", "random", "static", "human"], default="random") parser.add_argument("--max_steps", type=int, default=0) parser.add_argument("--fps",type=float) parser.add_argument("--once", action="store_true") parser.add_argument("--ignore_done", action="store_true") args = parser.parse_args() env = envs.make(args.env) ac_space = env.action_space fps = args.fps or env.metadata.get('video.frames_per_second') or 100 if args.max_steps == 0: args.max_steps = env.spec.timestep_limit if args.mode == "human": if isinstance(ac_space, spaces.Discrete): print("Press keys 0-{} to choose the agent's actions".format(ac_space.n-1)) import cv2 else: raise ValueError("Can only use human on discrete action space. Got {}".format(type(ac_space))) while True: env.reset() print("Starting a new trajectory") for t in range(args.max_steps) if args.max_steps else itertools.count(): done = False if args.mode == "noop": if isinstance(ac_space, spaces.Box): a = np.zeros(ac_space.shape) elif isinstance(ac_space, spaces.Discrete): a = 0 else: raise NotImplementedError("noop not implemented for class {}".format(type(ac_space))) _, _, done, _ = env.step(a) time.sleep(1.0/fps) elif args.mode == "random": a = ac_space.sample() _, _, done, _ = env.step(a) time.sleep(1.0/fps) elif args.mode == "static": time.sleep(1.0/fps) elif args.mode == "human": if t == 0: a = 0 else: key = cv2.waitKey(-1) a = key - ord('0') if a >= ac_space.n: print("WARNING: ignoring illegal action {}.".format(a)) a = 0 _, _, done, _ = env.step(a) env.render() if done and not args.ignore_done: break print("Done after {} steps".format(t+1)) if args.once: break else: raw_input("Press enter to continue")