import numpy as np import gym from gym import spaces from gym.utils import seeding # Unit test environment for CNNs and CNN+RNN algorithms. # Looks like this (RGB observations): # # --------------------------- # | | # | | # | | # | ** | # | ** | # | | # | | # | | # | | # | | # ======== ============== # # Goal is to go through the hole at the bottom. Agent controls square using Left-Nop-Right actions. # It falls down automatically, episode length is a bit less than FIELD_H # # CubeCrash-v0 # shaped reward # CubeCrashSparse-v0 # reward 0 or 1 at the end # CubeCrashScreenBecomesBlack-v0 # for RNNs # # To see how it works, run: # # python examples/agents/keyboard_agent.py CubeCrashScreen-v0 FIELD_W = 32 FIELD_H = 40 HOLE_WIDTH = 8 color_black = np.array((0,0,0)).astype('float32') color_white = np.array((255,255,255)).astype('float32') color_green = np.array((0,255,0)).astype('float32') class CubeCrash(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second' : 60, 'video.res_w' : FIELD_W, 'video.res_h' : FIELD_H, } use_shaped_reward = True use_black_screen = False use_random_colors = False # Makes env too hard def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H,FIELD_W,3), dtype=np.uint8) self.action_space = spaces.Discrete(3) self.reset() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def random_color(self): return np.array([ self.np_random.randint(low=0, high=255), self.np_random.randint(low=0, high=255), self.np_random.randint(low=0, high=255), ]).astype('uint8') def reset(self): self.cube_x = self.np_random.randint(low=3, high=FIELD_W-3) self.cube_y = self.np_random.randint(low=3, high=FIELD_H//6) self.hole_x = self.np_random.randint(low=HOLE_WIDTH, high=FIELD_W-HOLE_WIDTH) self.bg_color = self.random_color() if self.use_random_colors else color_black self.potential = None self.step_n = 0 while 1: self.wall_color = self.random_color() if self.use_random_colors else color_white self.cube_color = self.random_color() if self.use_random_colors else color_green if np.linalg.norm(self.wall_color - self.bg_color) < 50 or np.linalg.norm(self.cube_color - self.bg_color) < 50: continue break return self.step(0)[0] def step(self, action): if action==0: pass elif action==1: self.cube_x -= 1 elif action==2: self.cube_x += 1 else: assert 0, "Action %i is out of range" % action self.cube_y += 1 self.step_n += 1 obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 ) obs[:,:,:] = self.bg_color obs[FIELD_H-5:FIELD_H,:,:] = self.wall_color obs[FIELD_H-5:FIELD_H, self.hole_x-HOLE_WIDTH//2:self.hole_x+HOLE_WIDTH//2+1, :] = self.bg_color obs[self.cube_y-1:self.cube_y+2, self.cube_x-1:self.cube_x+2, :] = self.cube_color if self.use_black_screen and self.step_n > 4: obs[:] = np.zeros((3,), dtype=np.uint8) done = False reward = 0 dist = np.abs(self.cube_x - self.hole_x) if self.potential is not None and self.use_shaped_reward: reward = (self.potential - dist) * 0.01 self.potential = dist if self.cube_x-1 < 0 or self.cube_x+1 >= FIELD_W: done = True reward = -1 elif self.cube_y+1 >= FIELD_H-5: if dist >= HOLE_WIDTH//2: done = True reward = -1 elif self.cube_y == FIELD_H: done = True reward = +1 self.last_obs = obs return obs, reward, done, {} def render(self, mode='human'): if mode == 'rgb_array': return self.last_obs elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(self.last_obs) return self.viewer.isopen else: assert 0, "Render mode '%s' is not supported" % mode def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None class CubeCrashSparse(CubeCrash): use_shaped_reward = False class CubeCrashScreenBecomesBlack(CubeCrash): use_shaped_reward = False use_black_screen = True