Files
Gymnasium/gym/wrappers/monitoring/video_recorder.py

397 lines
14 KiB
Python
Raw Normal View History

2016-04-27 08:00:58 -07:00
import json
import os
2021-07-29 02:26:34 +02:00
import os.path
2016-04-27 08:00:58 -07:00
import subprocess
import tempfile
from io import StringIO
2021-07-29 02:26:34 +02:00
import distutils.spawn
import distutils.version
import numpy as np
from gym import error, logger
2016-04-27 08:00:58 -07:00
2021-07-29 02:26:34 +02:00
2016-04-27 08:00:58 -07:00
def touch(path):
2021-07-29 02:26:34 +02:00
open(path, "a").close()
2016-04-27 08:00:58 -07:00
class VideoRecorder(object):
"""VideoRecorder renders a nice movie of a rollout, frame by frame. It
comes with an `enabled` option so you can still use the same code
on episodes where you don't want to record video.
Note:
You are responsible for calling `close` on a created
VideoRecorder, or else you may leak an encoder process.
Args:
env (Env): Environment to take video of.
path (Optional[str]): Path to the video file; will be randomly chosen if omitted.
base_path (Optional[str]): Alternatively, path to the video file without extension, which will be added.
metadata (Optional[dict]): Contents to save to the metadata file.
enabled (bool): Whether to actually record video, or just no-op (for convenience)
"""
def __init__(self, env, path=None, metadata=None, enabled=True, base_path=None):
2021-07-29 02:26:34 +02:00
modes = env.metadata.get("render.modes", [])
self._async = env.metadata.get("semantics.async")
self.enabled = enabled
# Don't bother setting anything else if not enabled
if not self.enabled:
return
2016-04-27 08:00:58 -07:00
self.ansi_mode = False
2021-07-29 02:26:34 +02:00
if "rgb_array" not in modes:
if "ansi" in modes:
2016-04-27 08:00:58 -07:00
self.ansi_mode = True
else:
2021-07-29 02:26:34 +02:00
logger.info(
2021-07-29 12:42:48 -04:00
'Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".'.format(env)
2021-07-29 02:26:34 +02:00
)
# Whoops, turns out we shouldn't be enabled after all
self.enabled = False
return
2016-04-27 08:00:58 -07:00
if path is not None and base_path is not None:
raise error.Error("You can pass at most one of `path` or `base_path`.")
self.last_frame = None
self.env = env
2021-07-29 02:26:34 +02:00
required_ext = ".json" if self.ansi_mode else ".mp4"
2016-04-27 08:00:58 -07:00
if path is None:
if base_path is not None:
# Base path given, append ext
path = base_path + required_ext
else:
# Otherwise, just generate a unique filename
2021-07-29 12:42:48 -04:00
with tempfile.NamedTemporaryFile(suffix=required_ext, delete=False) as f:
2016-04-27 08:00:58 -07:00
path = f.name
self.path = path
path_base, actual_ext = os.path.splitext(self.path)
if actual_ext != required_ext:
2021-07-29 02:26:34 +02:00
hint = (
" HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format."
if self.ansi_mode
else ""
)
2021-07-29 12:42:48 -04:00
raise error.Error("Invalid path given: {} -- must have file extension {}.{}".format(self.path, required_ext, hint))
2016-04-27 08:00:58 -07:00
# Touch the file in any case, so we know it's present. (This
# corrects for platform platform differences. Using ffmpeg on
# OS X, the file is precreated, but not on Linux.
touch(path)
2021-07-29 02:26:34 +02:00
self.frames_per_sec = env.metadata.get("video.frames_per_second", 30)
2021-07-29 12:42:48 -04:00
self.output_frames_per_sec = env.metadata.get("video.output_frames_per_second", self.frames_per_sec)
2021-07-29 02:26:34 +02:00
self.encoder = None # lazily start the process
2016-04-27 08:00:58 -07:00
self.broken = False
# Dump metadata
self.metadata = metadata or {}
2021-07-29 12:42:48 -04:00
self.metadata["content_type"] = "video/vnd.openai.ansivid" if self.ansi_mode else "video/mp4"
2021-07-29 02:26:34 +02:00
self.metadata_path = "{}.meta.json".format(path_base)
2016-04-27 08:00:58 -07:00
self.write_metadata()
2021-07-29 02:26:34 +02:00
logger.info("Starting new video recorder writing to %s", self.path)
2016-04-27 08:00:58 -07:00
self.empty = True
@property
def functional(self):
return self.enabled and not self.broken
def capture_frame(self):
"""Render the given `env` and add the resulting frame to the video."""
2021-07-29 02:26:34 +02:00
if not self.functional:
return
logger.debug("Capturing video frame: path=%s", self.path)
2016-04-27 08:00:58 -07:00
2021-07-29 02:26:34 +02:00
render_mode = "ansi" if self.ansi_mode else "rgb_array"
2016-04-27 08:00:58 -07:00
frame = self.env.render(mode=render_mode)
if frame is None:
2016-11-24 00:48:21 -08:00
if self._async:
return
else:
# Indicates a bug in the environment: don't want to raise
# an error here.
2021-07-29 02:26:34 +02:00
logger.warn(
"Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s",
self.path,
self.metadata_path,
)
2016-11-24 00:48:21 -08:00
self.broken = True
2016-04-27 08:00:58 -07:00
else:
self.last_frame = frame
if self.ansi_mode:
self._encode_ansi_frame(frame)
else:
self._encode_image_frame(frame)
def close(self):
"""Make sure to manually close, or else you'll leak the encoder process"""
if not self.enabled:
return
if self.encoder:
2021-07-29 02:26:34 +02:00
logger.debug("Closing video encoder: path=%s", self.path)
2016-04-27 08:00:58 -07:00
self.encoder.close()
self.encoder = None
else:
# No frames captured. Set metadata, and remove the empty output file.
os.remove(self.path)
if self.metadata is None:
self.metadata = {}
2021-07-29 02:26:34 +02:00
self.metadata["empty"] = True
2016-04-27 08:00:58 -07:00
# If broken, get rid of the output file, otherwise we'd leak it.
if self.broken:
2021-07-29 02:26:34 +02:00
logger.info(
"Cleaning up paths for broken video recorder: path=%s metadata_path=%s",
self.path,
self.metadata_path,
)
2016-04-27 08:00:58 -07:00
# Might have crashed before even starting the output file, don't try to remove in that case.
if os.path.exists(self.path):
os.remove(self.path)
if self.metadata is None:
self.metadata = {}
2021-07-29 02:26:34 +02:00
self.metadata["broken"] = True
2016-04-27 08:00:58 -07:00
self.write_metadata()
def write_metadata(self):
2021-07-29 02:26:34 +02:00
with open(self.metadata_path, "w") as f:
2016-04-27 08:00:58 -07:00
json.dump(self.metadata, f)
def _encode_ansi_frame(self, frame):
if not self.encoder:
self.encoder = TextEncoder(self.path, self.frames_per_sec)
2021-07-29 02:26:34 +02:00
self.metadata["encoder_version"] = self.encoder.version_info
2016-04-27 08:00:58 -07:00
self.encoder.capture_frame(frame)
self.empty = False
def _encode_image_frame(self, frame):
if not self.encoder:
2021-07-29 12:42:48 -04:00
self.encoder = ImageEncoder(self.path, frame.shape, self.frames_per_sec, self.output_frames_per_sec)
2021-07-29 02:26:34 +02:00
self.metadata["encoder_version"] = self.encoder.version_info
2016-04-27 08:00:58 -07:00
try:
self.encoder.capture_frame(frame)
except error.InvalidFrame as e:
2021-07-29 02:26:34 +02:00
logger.warn("Tried to pass invalid video frame, marking as broken: %s", e)
2016-04-27 08:00:58 -07:00
self.broken = True
else:
self.empty = False
class TextEncoder(object):
"""Store a moving picture made out of ANSI frames. Format adapted from
https://github.com/asciinema/asciinema/blob/master/doc/asciicast-v1.md"""
def __init__(self, output_path, frames_per_sec):
self.output_path = output_path
self.frames_per_sec = frames_per_sec
self.frames = []
def capture_frame(self, frame):
string = None
if isinstance(frame, str):
2016-04-27 08:00:58 -07:00
string = frame
elif isinstance(frame, StringIO):
2016-04-27 08:00:58 -07:00
string = frame.getvalue()
else:
2021-07-29 02:26:34 +02:00
raise error.InvalidFrame(
2021-07-29 12:42:48 -04:00
"Wrong type {} for {}: text frame must be a string or StringIO".format(type(frame), frame)
2021-07-29 02:26:34 +02:00
)
frame_bytes = string.encode("utf-8")
if frame_bytes[-1:] != b"\n":
2021-07-29 12:42:48 -04:00
raise error.InvalidFrame('Frame must end with a newline: """{}"""'.format(string))
2021-07-29 02:26:34 +02:00
if b"\r" in frame_bytes:
2021-07-29 12:42:48 -04:00
raise error.InvalidFrame('Frame contains carriage returns (only newlines are allowed: """{}"""'.format(string))
2016-04-27 08:00:58 -07:00
self.frames.append(frame_bytes)
2016-04-27 08:00:58 -07:00
def close(self):
2021-07-29 02:26:34 +02:00
# frame_duration = float(1) / self.frames_per_sec
frame_duration = 0.5
2016-04-27 08:00:58 -07:00
# Turn frames into events: clear screen beforehand
# https://rosettacode.org/wiki/Terminal_control/Clear_the_screen#Python
# https://rosettacode.org/wiki/Terminal_control/Cursor_positioning#Python
clear_code = b"%c[2J\033[1;1H" % (27)
# Decode the bytes as UTF-8 since JSON may only contain UTF-8
2021-07-29 02:26:34 +02:00
events = [
(
frame_duration,
(clear_code + frame.replace(b"\n", b"\r\n")).decode("utf-8"),
)
for frame in self.frames
]
2016-04-27 08:00:58 -07:00
# Calculate frame size from the largest frames.
# Add some padding since we'll get cut off otherwise.
2021-07-29 02:26:34 +02:00
height = max([frame.count(b"\n") for frame in self.frames]) + 1
2021-07-29 12:42:48 -04:00
width = max([max([len(line) for line in frame.split(b"\n")]) for frame in self.frames]) + 2
2016-04-27 08:00:58 -07:00
data = {
"version": 1,
"width": width,
"height": height,
2021-07-29 02:26:34 +02:00
"duration": len(self.frames) * frame_duration,
2016-04-27 08:00:58 -07:00
"command": "-",
"title": "gym VideoRecorder episode",
2021-07-29 02:26:34 +02:00
"env": {}, # could add some env metadata here
2016-04-27 08:00:58 -07:00
"stdout": events,
}
2021-07-29 02:26:34 +02:00
with open(self.output_path, "w") as f:
2016-04-27 08:00:58 -07:00
json.dump(data, f)
@property
def version_info(self):
2021-07-29 02:26:34 +02:00
return {"backend": "TextEncoder", "version": 1}
2016-04-27 08:00:58 -07:00
class ImageEncoder(object):
def __init__(self, output_path, frame_shape, frames_per_sec, output_frames_per_sec):
2016-04-27 08:00:58 -07:00
self.proc = None
self.output_path = output_path
# Frame shape should be lines-first, so w and h are swapped
h, w, pixfmt = frame_shape
if pixfmt != 3 and pixfmt != 4:
2021-07-29 02:26:34 +02:00
raise error.InvalidFrame(
"Your frame has shape {}, but we require (w,h,3) or (w,h,4), i.e., RGB values for a w-by-h image, with an optional alpha channel.".format(
frame_shape
)
)
self.wh = (w, h)
self.includes_alpha = pixfmt == 4
2016-04-27 08:00:58 -07:00
self.frame_shape = frame_shape
self.frames_per_sec = frames_per_sec
self.output_frames_per_sec = output_frames_per_sec
2016-04-27 08:00:58 -07:00
2021-07-29 02:26:34 +02:00
if distutils.spawn.find_executable("avconv") is not None:
self.backend = "avconv"
elif distutils.spawn.find_executable("ffmpeg") is not None:
self.backend = "ffmpeg"
2016-04-27 08:00:58 -07:00
else:
2021-07-29 02:26:34 +02:00
raise error.DependencyNotInstalled(
"""Found neither the ffmpeg nor avconv executables. On OS X, you can install ffmpeg via `brew install ffmpeg`. On most Ubuntu variants, `sudo apt-get install ffmpeg` should do it. On Ubuntu 14.04, however, you'll need to install avconv with `sudo apt-get install libav-tools`."""
)
2016-04-27 08:00:58 -07:00
self.start()
@property
def version_info(self):
return {
2021-07-29 02:26:34 +02:00
"backend": self.backend,
2021-07-29 12:42:48 -04:00
"version": str(subprocess.check_output([self.backend, "-version"], stderr=subprocess.STDOUT)),
2021-07-29 02:26:34 +02:00
"cmdline": self.cmdline,
}
2016-04-27 08:00:58 -07:00
def start(self):
if self.backend == "ffmpeg":
2021-07-29 02:26:34 +02:00
self.cmdline = (
self.backend,
"-nostats",
"-loglevel",
"error", # suppress warnings
"-y",
# input
"-f",
"rawvideo",
"-s:v",
"{}x{}".format(*self.wh),
"-pix_fmt",
("rgb32" if self.includes_alpha else "rgb24"),
"-r",
"%d" % self.frames_per_sec,
"-i",
"-", # this used to be /dev/stdin, which is not Windows-friendly
# output
"-an",
"-r",
"%d" % self.frames_per_sec,
"-vcodec",
"mpeg4",
"-pix_fmt",
"bgr24",
"-r",
"%d" % self.output_frames_per_sec,
self.output_path,
)
else:
2021-07-29 02:26:34 +02:00
self.cmdline = (
self.backend,
"-nostats",
"-loglevel",
"error", # suppress warnings
"-y",
# input
"-f",
"rawvideo",
"-s:v",
"{}x{}".format(*self.wh),
"-pix_fmt",
("rgb32" if self.includes_alpha else "rgb24"),
"-framerate",
"%d" % self.frames_per_sec,
"-i",
"-", # this used to be /dev/stdin, which is not Windows-friendly
# output
"-vf",
"scale=trunc(iw/2)*2:trunc(ih/2)*2",
"-vcodec",
"libx264",
"-pix_fmt",
"yuv420p",
"-r",
"%d" % self.output_frames_per_sec,
self.output_path,
)
logger.debug('Starting %s with "%s"', self.backend, " ".join(self.cmdline))
if hasattr(os, "setsid"): # setsid not present on Windows
2021-07-29 12:42:48 -04:00
self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE, preexec_fn=os.setsid)
2016-12-27 22:04:47 -05:00
else:
self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE)
2016-04-27 08:00:58 -07:00
def capture_frame(self, frame):
if not isinstance(frame, (np.ndarray, np.generic)):
2021-07-29 12:42:48 -04:00
raise error.InvalidFrame("Wrong type {} for {} (must be np.ndarray or np.generic)".format(type(frame), frame))
2016-04-27 08:00:58 -07:00
if frame.shape != self.frame_shape:
2021-07-29 02:26:34 +02:00
raise error.InvalidFrame(
"Your frame has shape {}, but the VideoRecorder is configured for shape {}.".format(
frame.shape, self.frame_shape
)
)
2016-04-27 08:00:58 -07:00
if frame.dtype != np.uint8:
2021-07-29 02:26:34 +02:00
raise error.InvalidFrame(
2021-07-29 12:42:48 -04:00
"Your frame has data type {}, but we require uint8 (i.e. RGB values from 0-255).".format(frame.dtype)
2021-07-29 02:26:34 +02:00
)
2016-04-27 08:00:58 -07:00
try:
2021-07-29 12:42:48 -04:00
if distutils.version.LooseVersion(np.__version__) >= distutils.version.LooseVersion("1.9.0"):
self.proc.stdin.write(frame.tobytes())
else:
self.proc.stdin.write(frame.tostring())
except Exception as e:
stdout, stderr = self.proc.communicate()
logger.error("VideoRecorder encoder failed: %s", stderr)
2016-04-27 08:00:58 -07:00
def close(self):
self.proc.stdin.close()
ret = self.proc.wait()
if ret != 0:
logger.error("VideoRecorder encoder exited with status {}".format(ret))