Files
Gymnasium/gym/monitoring/stats_recorder.py

101 lines
3.4 KiB
Python
Raw Normal View History

2016-04-27 08:00:58 -07:00
import json
import os
import time
from gym import error
from gym.utils import atomic_write
2016-04-27 08:00:58 -07:00
class StatsRecorder(object):
2016-10-23 14:05:42 -07:00
def __init__(self, directory, file_prefix, autoreset=False, env_id=None):
2016-10-23 10:35:24 -07:00
self.autoreset = autoreset
2016-10-23 14:05:42 -07:00
self.env_id = env_id
2016-10-23 10:35:24 -07:00
self.initial_reset_timestamp = None
2016-04-27 08:00:58 -07:00
self.directory = directory
self.file_prefix = file_prefix
self.episode_lengths = []
self.episode_rewards = []
self.episode_types = [] # experimental addition
self._type = 't'
2016-04-27 08:00:58 -07:00
self.timestamps = []
self.steps = None
self.rewards = None
self.done = None
self.closed = False
[WIP] add support for seeding environments (#135) * Make environments seedable * Fix monitor bugs - Set monitor_id before setting the infix. This was a bug that would yield incorrect results with multiple monitors. - Remove extra pid from stats recorder filename. This should be purely cosmetic. * Start uploading seeds in episode_batch * Fix _bigint_from_bytes for python3 * Set seed explicitly in random_agent * Pass through seed argument * Also pass through random state to spaces * Pass random state into the observation/action spaces * Make all _seed methods return the list of used seeds * Switch over to np.random where possible * Start hashing seeds, and also seed doom engine * Fixup seeding determinism in many cases * Seed before loading the ROM * Make seeding more Python3 friendly * Make the MuJoCo skipping a bit more forgiving * Remove debugging PDB calls * Make setInt argument into raw bytes * Validate and upload seeds * Skip box2d * Make seeds smaller, and change representation of seeds in upload * Handle long seeds * Fix RandomAgent example to be deterministic * Handle integer types correctly in Python2 and Python3 * Try caching pip * Try adding swap * Add df and free calls * Bump swap * Bump swap size * Try setting overcommit * Try other sysctls * Try fixing overcommit * Try just setting overcommit_memory=1 * Add explanatory comment * Add what's new section to readme * BUG: Mark ElevatorAction-ram-v0 as non-deterministic for now * Document seed * Move nondetermistic check into spec
2016-05-29 09:07:09 -07:00
filename = '{}.stats.json'.format(self.file_prefix)
self.path = os.path.join(self.directory, filename)
2016-04-27 08:00:58 -07:00
@property
def type(self):
return self._type
@type.setter
def type(self, type):
if type not in ['t', 'e']:
raise error.Error('Invalid episode type {}: must be t for training or e for evaluation', type)
self._type = type
2016-04-27 08:00:58 -07:00
def before_step(self, action):
assert not self.closed
2016-04-27 08:00:58 -07:00
if self.done:
2016-10-23 14:05:42 -07:00
raise error.ResetNeeded("Trying to step environment which is currently done. While the monitor is active for {}, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode.".format(self.env_id))
2016-04-27 08:00:58 -07:00
elif self.steps is None:
2016-10-23 14:05:42 -07:00
raise error.ResetNeeded("Trying to step an environment before reset. While the monitor is active for {}, you must call 'env.reset()' before taking an initial step.".format(self.env_id))
2016-04-27 08:00:58 -07:00
def after_step(self, observation, reward, done, info):
self.steps += 1
self.rewards += reward
2016-10-31 20:06:29 -07:00
if done:
self.save_complete()
2016-04-27 08:00:58 -07:00
if done:
self.done = True
2016-10-23 10:35:24 -07:00
if self.autoreset:
self.before_reset()
self.after_reset(observation)
2016-04-27 08:00:58 -07:00
def before_reset(self):
assert not self.closed
if self.done is not None and not self.done:
raise error.Error("Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over.".format(self.env_id))
2016-04-27 08:00:58 -07:00
self.done = False
if self.initial_reset_timestamp is None:
self.initial_reset_timestamp = time.time()
2016-04-27 08:00:58 -07:00
def after_reset(self, observation):
self.steps = 0
self.rewards = 0
# We write the type at the beginning of the episode. If a user
# changes the type, it's more natural for it to apply next
# time the user calls reset().
self.episode_types.append(self._type)
2016-04-27 08:00:58 -07:00
def save_complete(self):
2016-04-27 08:00:58 -07:00
if self.steps is not None:
self.episode_lengths.append(self.steps)
self.episode_rewards.append(self.rewards)
self.timestamps.append(time.time())
def close(self):
self.flush()
self.closed = True
def flush(self):
if self.closed:
return
with atomic_write.atomic_write(self.path) as f:
2016-04-27 08:00:58 -07:00
json.dump({
'initial_reset_timestamp': self.initial_reset_timestamp,
2016-04-27 08:00:58 -07:00
'timestamps': self.timestamps,
'episode_lengths': self.episode_lengths,
'episode_rewards': self.episode_rewards,
'episode_types': self.episode_types,
2016-04-27 08:00:58 -07:00
}, f)